Commits

Anonymous committed 76a37dc

Added two network normalization methods, an "absolute value" flag and corrected std.dev calculation for paircount in Dat2Dab

  • Participants
  • Parent commits 88f9d12

Comments (0)

Files changed (4)

tools/Dat2Dab/Dat2Dab.cpp

 *****************************************************************************/
 #include "stdafx.h"
 #include "cmdline.h"
+#include <cmath>
 
 #include "statistics.h"
 #include "datapair.h"
 	
 	if( sArgs.normalizeNPone_flag )
 	  Dat.Normalize( CDat::ENormalizeMinMaxNPone );
+
+	if( sArgs.zero_flag || sArgs.dmissing_given ) {
+        for( i = 0; i < Dat.GetGenes( ); ++i ) {
+            for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+                if( CMeta::IsNaN( Dat.Get( i, j ) ) ){
+                    if ( sArgs.zero_flag ){
+                        Dat.Set( i, j, 0 );
+                    }
+                    else{
+                        Dat.Set( i, j, sArgs.dmissing_arg );
+                    }
+                }
+            }
+        }
+    }
 	
-	if( sArgs.zero_flag || sArgs.dmissing_given )
-	  for( i = 0; i < Dat.GetGenes( ); ++i )
-	    for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
-	      if( CMeta::IsNaN( Dat.Get( i, j ) ) ){
-		if ( sArgs.zero_flag ){
-		  Dat.Set( i, j, 0 );
-		}
-		else{
-		  Dat.Set( i, j, sArgs.dmissing_arg );
-		}
-	      }
-	
-	if( sArgs.NegExp_flag ){
-	  float d;
-	  for( i = 0; i < Dat.GetGenes( ); ++i )
-	    for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ){
-	      if( CMeta::IsNaN( d=Dat.Get( i, j ) ) )
-		continue;
-	      Dat.Set( i, j, exp(-d) );
-	    } 
-	}
-	
+    if( sArgs.NegExp_flag ){
+        float d;
+        for( i = 0; i < Dat.GetGenes( ); ++i ) {
+            for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+                if( CMeta::IsNaN( d=Dat.Get( i, j ) ) )
+                    continue;
+                Dat.Set( i, j, exp(-d) );
+            }
+        }
+    }
+
+    if( sArgs.abs_flag ){
+        float d;
+        for( i = 0; i < Dat.GetGenes( ); ++i ) {
+            for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+                if( CMeta::IsNaN( d=Dat.Get( i, j ) ) )
+                    continue;
+                Dat.Set( i, j, abs(d) );
+            }
+        }
+    }
+
 	if( sArgs.flip_flag )
 		Dat.Invert( );
+
+    if( sArgs.normalizeDeg_flag ) {
+        size_t			    iCutoff;
+        float			    d;
+        vector<size_t>	    veciCounts;
+        vector<float>	    vecdTotals;
+
+        veciCounts.resize( Dat.GetGenes( ) );
+        fill( veciCounts.begin( ), veciCounts.end( ), 0 );
+        vecdTotals.resize( Dat.GetGenes( ) );
+        fill( vecdTotals.begin( ), vecdTotals.end( ), 0.0f );
+
+        for( iCutoff = i = 0; i < Dat.GetGenes( ); ++i ) {
+            for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+                if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) {
+                    if( !sArgs.cutoff_given || ( d >= sArgs.cutoff_arg ) ) {
+                        // d = abs (d);
+                        iCutoff++;
+                        veciCounts[ i ]++;
+                        veciCounts[ j ]++;
+                        vecdTotals[ i ] += d;
+                        vecdTotals[ j ] += d;
+                        d *= d;
+                    }
+                }
+            }
+        }
+
+        for( i = 0; i < Dat.GetGenes( ); ++i ) {
+            for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+                if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) {
+                    if( !sArgs.cutoff_given || ( d >= sArgs.cutoff_arg ) ) {
+                        d = d * veciCounts[ i ] / sqrt( vecdTotals[ i ] * vecdTotals[ j ] );
+                        Dat.Set( i, j, d );
+                    }
+                }
+            }
+        }
+    }
+
+    if( sArgs.normalizeLoc_flag ) {
+        size_t			    iCutoff;
+        float			    d, zi, zj;
+        vector<size_t>	    veciCounts;
+        vector<float>	    vecdTotals, vecdAvgs, vecdSquares;
+
+        veciCounts.resize( Dat.GetGenes( ) );
+        fill( veciCounts.begin( ), veciCounts.end( ), 0 );
+        vecdTotals.resize( Dat.GetGenes( ) );
+        fill( vecdTotals.begin( ), vecdTotals.end( ), 0.0f );
+        vecdAvgs.resize( Dat.GetGenes( ) );
+        fill( vecdAvgs.begin( ), vecdAvgs.end( ), 0.0f );
+        vecdSquares.resize( Dat.GetGenes( ) );
+        fill( vecdSquares.begin( ), vecdSquares.end( ), 0.0f );
+
+        for( iCutoff = i = 0; i < Dat.GetGenes( ); ++i ) {
+            for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+                if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) {
+                    if( !sArgs.cutoff_given || ( d >= sArgs.cutoff_arg ) ) {
+                        // d = abs (d);
+                        iCutoff++;
+                        veciCounts[ i ]++;
+                        veciCounts[ j ]++;
+                        vecdTotals[ i ] += d;
+                        vecdTotals[ j ] += d;
+
+                        d *= d;
+                        vecdSquares[ i ] += d;
+                        vecdSquares[ j ] += d;
+                    }
+                }
+            }
+        }
+
+        for( i = 0; i < vecdSquares.size( ); ++i ) {
+            d = vecdTotals[ i ] / veciCounts[ i ];
+            vecdAvgs[ i ] = d;
+            vecdSquares[ i ] = sqrt( ( vecdSquares[ i ] / veciCounts[ i ] ) - ( d * d ) );
+        }
+
+         for( i = 0; i < Dat.GetGenes( ); ++i ) {
+            for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+                if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) {
+                    if( !sArgs.cutoff_given || ( d >= sArgs.cutoff_arg ) ) {
+                        if( vecdSquares[ i ] == 0 ) { zi = 0; }
+                        else { zi = (d - vecdAvgs[ i ]) / vecdSquares[ i ]; }
+
+                        if( vecdSquares[ j ] == 0 ) { zj = 0; }
+                        else { zj = (d - vecdAvgs[ j ]) / vecdSquares[ j ]; }
+                        
+                        d = ( zi + zj ) / sqrt(2);
+                        Dat.Set( i, j, d );
+                    }
+                }
+            }
+        }
+    }
+
 	if( Genes.GetGenes( ) )
 		Dat.FilterGenes( Genes, CDat::EFilterInclude );
 	if( sArgs.genex_arg )
 			for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
 				if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) {
 					if( !sArgs.cutoff_given || ( d >= sArgs.cutoff_arg ) ) {
+                        // d = abs (d);
 						dAve += d;
 						iCutoff++;
 						veciCounts[ i ]++;
 			cout << Dat.GetGene( i ) << '\t' << vecdTotals[ i ] << '\t' << veciCounts[ i ] << '\t' <<
 				vecdSquares[ i ] << endl;
 		return 0; }
+
 	if( sArgs.cutoff_given )
 		for( i = 0; i < Dat.GetGenes( ); ++i )
 			for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
 				if( Dat.Get( i, j ) < sArgs.cutoff_arg )
 					Dat.Set( i, j, CMeta::GetNaN( ) );
+
 	if( sArgs.subsample_arg < 1 )
 		for( i = 0; i < Dat.GetGenes( ); ++i )
 			for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )

tools/Dat2Dab/Dat2Dab.ggo

 section "Preprocessing"
 option	"flip"			f	"Calculate one minus values"
 							flag	off
+option	"abs"			B	"Calculate absolute values"
+							flag	off
 option	"normalize"		n	"Normalize to the range [0,1]"
 							flag	off
 option	"normalizeNPone"	w	"Normalize to the range [-1,1]"
 							flag	off
+option	"normalizeDeg"		j	"Normalize by incident node degrees"
+							flag	off
+option	"normalizeLoc"		k	"Normalize by local neighborhood"
+							flag	off
 option	"zscore"		z	"Convert values to z-scores"
 							flag	off
 option	"rank"			r	"Rank transform data"

tools/Dat2Dab/cmdline.c

   "  -q, --quant=filename     Input Quant file",
   "\nPreprocessing:",
   "  -f, --flip               Calculate one minus values  (default=off)",
+  "  -B, --abs                Calculate absolute values  (default=off)",
   "  -n, --normalize          Normalize to the range [0,1]  (default=off)",
   "  -w, --normalizeNPone     Normalize to the range [-1,1]  (default=off)",
+  "  -j, --normalizeDeg       Normalize by incident node degrees  (default=off)",
+  "  -k, --normalizeLoc       Normalize by local neighborhood  (default=off)",
   "  -z, --zscore             Convert values to z-scores  (default=off)",
   "  -r, --rank               Rank transform data  (default=off)",
   "  -a, --randomize          Randomize data  (default=off)",
   args_info->output_given = 0 ;
   args_info->quant_given = 0 ;
   args_info->flip_given = 0 ;
+  args_info->abs_given = 0 ;
   args_info->normalize_given = 0 ;
   args_info->normalizeNPone_given = 0 ;
+  args_info->normalizeDeg_given = 0 ;
+  args_info->normalizeLoc_given = 0 ;
   args_info->zscore_given = 0 ;
   args_info->rank_given = 0 ;
   args_info->randomize_given = 0 ;
   args_info->quant_arg = NULL;
   args_info->quant_orig = NULL;
   args_info->flip_flag = 0;
+  args_info->abs_flag = 0;
   args_info->normalize_flag = 0;
   args_info->normalizeNPone_flag = 0;
+  args_info->normalizeDeg_flag = 0;
+  args_info->normalizeLoc_flag = 0;
   args_info->zscore_flag = 0;
   args_info->rank_flag = 0;
   args_info->randomize_flag = 0;
   args_info->output_help = gengetopt_args_info_help[4] ;
   args_info->quant_help = gengetopt_args_info_help[5] ;
   args_info->flip_help = gengetopt_args_info_help[7] ;
-  args_info->normalize_help = gengetopt_args_info_help[8] ;
-  args_info->normalizeNPone_help = gengetopt_args_info_help[9] ;
-  args_info->zscore_help = gengetopt_args_info_help[10] ;
-  args_info->rank_help = gengetopt_args_info_help[11] ;
-  args_info->randomize_help = gengetopt_args_info_help[12] ;
-  args_info->NegExp_help = gengetopt_args_info_help[13] ;
-  args_info->genes_help = gengetopt_args_info_help[15] ;
-  args_info->genex_help = gengetopt_args_info_help[16] ;
-  args_info->genee_help = gengetopt_args_info_help[17] ;
-  args_info->edges_help = gengetopt_args_info_help[18] ;
-  args_info->exedges_help = gengetopt_args_info_help[19] ;
-  args_info->gexedges_help = gengetopt_args_info_help[20] ;
-  args_info->cutoff_help = gengetopt_args_info_help[21] ;
-  args_info->zero_help = gengetopt_args_info_help[22] ;
-  args_info->dval_help = gengetopt_args_info_help[23] ;
-  args_info->dmissing_help = gengetopt_args_info_help[24] ;
-  args_info->duplicates_help = gengetopt_args_info_help[25] ;
-  args_info->subsample_help = gengetopt_args_info_help[26] ;
-  args_info->lookup1_help = gengetopt_args_info_help[28] ;
-  args_info->lookup2_help = gengetopt_args_info_help[29] ;
-  args_info->lookups1_help = gengetopt_args_info_help[30] ;
-  args_info->lookups2_help = gengetopt_args_info_help[31] ;
-  args_info->genelist_help = gengetopt_args_info_help[32] ;
-  args_info->paircount_help = gengetopt_args_info_help[33] ;
-  args_info->ccoeff_help = gengetopt_args_info_help[34] ;
-  args_info->hubbiness_help = gengetopt_args_info_help[35] ;
-  args_info->mar_help = gengetopt_args_info_help[36] ;
-  args_info->remap_help = gengetopt_args_info_help[38] ;
-  args_info->table_help = gengetopt_args_info_help[39] ;
-  args_info->skip_help = gengetopt_args_info_help[40] ;
-  args_info->memmap_help = gengetopt_args_info_help[41] ;
-  args_info->random_help = gengetopt_args_info_help[42] ;
-  args_info->noise_help = gengetopt_args_info_help[43] ;
-  args_info->verbosity_help = gengetopt_args_info_help[44] ;
+  args_info->abs_help = gengetopt_args_info_help[8] ;
+  args_info->normalize_help = gengetopt_args_info_help[9] ;
+  args_info->normalizeNPone_help = gengetopt_args_info_help[10] ;
+  args_info->normalizeDeg_help = gengetopt_args_info_help[11] ;
+  args_info->normalizeLoc_help = gengetopt_args_info_help[12] ;
+  args_info->zscore_help = gengetopt_args_info_help[13] ;
+  args_info->rank_help = gengetopt_args_info_help[14] ;
+  args_info->randomize_help = gengetopt_args_info_help[15] ;
+  args_info->NegExp_help = gengetopt_args_info_help[16] ;
+  args_info->genes_help = gengetopt_args_info_help[18] ;
+  args_info->genex_help = gengetopt_args_info_help[19] ;
+  args_info->genee_help = gengetopt_args_info_help[20] ;
+  args_info->edges_help = gengetopt_args_info_help[21] ;
+  args_info->exedges_help = gengetopt_args_info_help[22] ;
+  args_info->gexedges_help = gengetopt_args_info_help[23] ;
+  args_info->cutoff_help = gengetopt_args_info_help[24] ;
+  args_info->zero_help = gengetopt_args_info_help[25] ;
+  args_info->dval_help = gengetopt_args_info_help[26] ;
+  args_info->dmissing_help = gengetopt_args_info_help[27] ;
+  args_info->duplicates_help = gengetopt_args_info_help[28] ;
+  args_info->subsample_help = gengetopt_args_info_help[29] ;
+  args_info->lookup1_help = gengetopt_args_info_help[31] ;
+  args_info->lookup2_help = gengetopt_args_info_help[32] ;
+  args_info->lookups1_help = gengetopt_args_info_help[33] ;
+  args_info->lookups2_help = gengetopt_args_info_help[34] ;
+  args_info->genelist_help = gengetopt_args_info_help[35] ;
+  args_info->paircount_help = gengetopt_args_info_help[36] ;
+  args_info->ccoeff_help = gengetopt_args_info_help[37] ;
+  args_info->hubbiness_help = gengetopt_args_info_help[38] ;
+  args_info->mar_help = gengetopt_args_info_help[39] ;
+  args_info->remap_help = gengetopt_args_info_help[41] ;
+  args_info->table_help = gengetopt_args_info_help[42] ;
+  args_info->skip_help = gengetopt_args_info_help[43] ;
+  args_info->memmap_help = gengetopt_args_info_help[44] ;
+  args_info->random_help = gengetopt_args_info_help[45] ;
+  args_info->noise_help = gengetopt_args_info_help[46] ;
+  args_info->verbosity_help = gengetopt_args_info_help[47] ;
   
 }
 
     write_into_file(outfile, "quant", args_info->quant_orig, 0);
   if (args_info->flip_given)
     write_into_file(outfile, "flip", 0, 0 );
+  if (args_info->abs_given)
+    write_into_file(outfile, "abs", 0, 0 );
   if (args_info->normalize_given)
     write_into_file(outfile, "normalize", 0, 0 );
   if (args_info->normalizeNPone_given)
     write_into_file(outfile, "normalizeNPone", 0, 0 );
+  if (args_info->normalizeDeg_given)
+    write_into_file(outfile, "normalizeDeg", 0, 0 );
+  if (args_info->normalizeLoc_given)
+    write_into_file(outfile, "normalizeLoc", 0, 0 );
   if (args_info->zscore_given)
     write_into_file(outfile, "zscore", 0, 0 );
   if (args_info->rank_given)
         { "output",	1, NULL, 'o' },
         { "quant",	1, NULL, 'q' },
         { "flip",	0, NULL, 'f' },
+        { "abs",	0, NULL, 'B' },
         { "normalize",	0, NULL, 'n' },
         { "normalizeNPone",	0, NULL, 'w' },
+        { "normalizeDeg",	0, NULL, 'j' },
+        { "normalizeLoc",	0, NULL, 'k' },
         { "zscore",	0, NULL, 'z' },
         { "rank",	0, NULL, 'r' },
         { "randomize",	0, NULL, 'a' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hi:o:q:fnwzraKg:G:D:e:x:X:c:ZV:M:du:l:L:t:T:EPCHJp:bs:mR:Nv:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hi:o:q:fBnwjkzraKg:G:D:e:x:X:c:ZV:M:du:l:L:t:T:EPCHJp:bs:mR:Nv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'B':	/* Calculate absolute values.  */
+        
+        
+          if (update_arg((void *)&(args_info->abs_flag), 0, &(args_info->abs_given),
+              &(local_args_info.abs_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "abs", 'B',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'n':	/* Normalize to the range [0,1].  */
         
         
             goto failure;
         
           break;
+        case 'j':	/* Normalize by incident node degrees.  */
+        
+        
+          if (update_arg((void *)&(args_info->normalizeDeg_flag), 0, &(args_info->normalizeDeg_given),
+              &(local_args_info.normalizeDeg_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "normalizeDeg", 'j',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'k':	/* Normalize by local neighborhood.  */
+        
+        
+          if (update_arg((void *)&(args_info->normalizeLoc_flag), 0, &(args_info->normalizeLoc_given),
+              &(local_args_info.normalizeLoc_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "normalizeLoc", 'k',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'z':	/* Convert values to z-scores.  */
         
         

tools/Dat2Dab/cmdline.h

   const char *quant_help; /**< @brief Input Quant file help description.  */
   int flip_flag;	/**< @brief Calculate one minus values (default=off).  */
   const char *flip_help; /**< @brief Calculate one minus values help description.  */
+  int abs_flag;	/**< @brief Calculate absolute values (default=off).  */
+  const char *abs_help; /**< @brief Calculate absolute values help description.  */
   int normalize_flag;	/**< @brief Normalize to the range [0,1] (default=off).  */
   const char *normalize_help; /**< @brief Normalize to the range [0,1] help description.  */
   int normalizeNPone_flag;	/**< @brief Normalize to the range [-1,1] (default=off).  */
   const char *normalizeNPone_help; /**< @brief Normalize to the range [-1,1] help description.  */
+  int normalizeDeg_flag;	/**< @brief Normalize by incident node degrees (default=off).  */
+  const char *normalizeDeg_help; /**< @brief Normalize by incident node degrees help description.  */
+  int normalizeLoc_flag;	/**< @brief Normalize by local neighborhood (default=off).  */
+  const char *normalizeLoc_help; /**< @brief Normalize by local neighborhood help description.  */
   int zscore_flag;	/**< @brief Convert values to z-scores (default=off).  */
   const char *zscore_help; /**< @brief Convert values to z-scores help description.  */
   int rank_flag;	/**< @brief Rank transform data (default=off).  */
   unsigned int output_given ;	/**< @brief Whether output was given.  */
   unsigned int quant_given ;	/**< @brief Whether quant was given.  */
   unsigned int flip_given ;	/**< @brief Whether flip was given.  */
+  unsigned int abs_given ;	/**< @brief Whether abs was given.  */
   unsigned int normalize_given ;	/**< @brief Whether normalize was given.  */
   unsigned int normalizeNPone_given ;	/**< @brief Whether normalizeNPone was given.  */
+  unsigned int normalizeDeg_given ;	/**< @brief Whether normalizeDeg was given.  */
+  unsigned int normalizeLoc_given ;	/**< @brief Whether normalizeLoc was given.  */
   unsigned int zscore_given ;	/**< @brief Whether zscore was given.  */
   unsigned int rank_given ;	/**< @brief Whether rank was given.  */
   unsigned int randomize_given ;	/**< @brief Whether randomize was given.  */