Anonymous avatar Anonymous committed 3206b05

[svn r423] Add single-channel MA normalization (off by default) to COALESCE

Comments (0)

Files changed (6)

proj/vs2008/SVMer/SVMer.vcproj

 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2DE}"
 			>
 			<File
-				RelativePath="..\..\..\tools\SVMer\cmdline.c"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						PreprocessorDefinitions="_CRT_SECURE_NO_WARNINGS"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						PreprocessorDefinitions="_CRT_SECURE_NO_WARNINGS"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
 				RelativePath="..\..\..\../extlib\gengetopt-2.22\gl\getopt.c"
 				>
 				<FileConfiguration
 
 // CCoalesce
 
+void CCoalesceImpl::Normalize( CPCL& PCL ) {
+	static const double	c_dLog2		= log( 2.0 );
+	vector<float>		vecdMedian;
+	vector<size_t>		veciSingle;
+	size_t				i, j, k;
+	float				d, dMedian;
+
+	for( i = 0; i < PCL.GetExperiments( ); ++i ) {
+		for( j = 0; j < PCL.GetGenes( ); ++j )
+			if( !CMeta::IsNaN( d = PCL.Get( j, i ) ) && ( d < 0 ) )
+				break;
+		if( j >= PCL.GetGenes( ) )
+			veciSingle.push_back( i ); }
+	if( veciSingle.empty( ) )
+		return;
+
+	vecdMedian.resize( veciSingle.size( ) );
+	for( i = 0; i < PCL.GetGenes( ); ++i ) {
+		for( j = k = 0; j < veciSingle.size( ); ++j )
+			if( !CMeta::IsNaN( d = PCL.Get( i, veciSingle[ j ] ) ) )
+				vecdMedian[ k++ ] = d;
+		if( !k )
+			continue;
+		dMedian = (float)CStatistics::Percentile( vecdMedian.begin( ), vecdMedian.begin( ) + k, 0.5 );
+		for( j = 0; j < veciSingle.size( ); ++j )
+			if( !CMeta::IsNaN( d = PCL.Get( i, k = veciSingle[ j ] ) ) )
+				PCL.Set( i, k, (float)( log( d / dMedian ) / c_dLog2 ) ); } }
+
 CCoalesceImpl::~CCoalesceImpl( ) {
 
 	Clear( ); }
 	for( i = 0; i < m_vecpWiggles.size( ); ++i )
 		sModifiers.Add( m_vecpWiggles[ i ] );
 	PCLCopy.Open( PCL );
+	if( GetNormalize( ) )
+		Normalize( PCLCopy );
 	if( !( InitializeDatasets( PCLCopy ) && InitializeGeneScores( PCLCopy, FASTA, veciPCL2FASTA, sModifiers,
 		GeneScores ) ) )
 		return false;
 
 		m_iBasesPerMatch = iBasesPerMatch; }
 
-	const std::string& GetSequenceCache( ) const {
-
-		return m_strSequenceCache; }
-
-	void SetSequenceCache( const std::string& strSequenceCache ) {
-
-		m_strSequenceCache = strSequenceCache; }
-
 	float GetPValueMerge( ) const {
 
 		return m_dPValueMerge; }
 	void ClearOutputIntermediate( ) {
 
 		m_vecpostm.clear( ); }
+
+	void SetNormalize( bool fNormalize ) {
+
+		m_fNormalize = fNormalize; }
+
+	bool GetNormalize( ) const {
+
+		return m_fNormalize; }
 };
 
 }
 	};
 
 	static void* ThreadCombineMotif( void* );
+	static void Normalize( CPCL& );
 
 	CCoalesceImpl( ) : m_iK(7), m_dPValueCorrelation(0.05f), m_iBins(12), m_dZScoreCondition(0.5f),
 		m_dProbabilityGene(0.95f), m_dZScoreMotif(0.5f), m_pMotifs(NULL), m_fMotifs(false),
 		m_iBasesPerMatch(5000), m_dPValueMerge(0.05f), m_dCutoffMerge(2.5f), m_iSizeMinimum(5),
 		m_iThreads(1), m_iSizeMerge(100), m_iSizeMaximum(1000), m_dPValueCondition(0.05f),
-		m_dPValueMotif(0.05f) { }
+		m_dPValueMotif(0.05f), m_fNormalize(false) { }
 	virtual ~CCoalesceImpl( );
 
 	void Clear( );
 	std::string						m_strDirectoryIntermediate;
 	CCoalesceMotifLibrary*			m_pMotifs;
 	bool							m_fMotifs;
+	bool							m_fNormalize;
 	size_t							m_iBasesPerMatch;
-	std::string						m_strSequenceCache;
 	std::vector<SCoalesceDataset>	m_vecsDatasets;
 	std::vector<const CFASTA*>		m_vecpWiggles;
 	std::vector<std::ostream*>		m_vecpostm;

tools/COALESCE/COALESCE.cpp

 	Coalesce.SetSizeMinimum( sArgs.size_minimum_arg );
 	Coalesce.SetSizeMerge( sArgs.size_merge_arg );
 	Coalesce.SetSizeMaximum( sArgs.size_maximum_arg );
+	Coalesce.SetNormalize( !!sArgs.normalize_flag );
 	Coalesce.SetThreads( sArgs.threads_arg );
 	if( sArgs.output_arg )
 		Coalesce.SetDirectoryIntermediate( sArgs.output_arg );
-	if( sArgs.cache_arg )
-		Coalesce.SetSequenceCache( sArgs.cache_arg );
 
 	vecpFASTAs.resize( sArgs.inputs_num );
 	for( i = 0; i < vecpFASTAs.size( ); ++i ) {

tools/COALESCE/COALESCE.ggo

 							int	default="2500"
 
 section "Miscellaneous"
-option	"cache"			e	"Cache file for sequence analysis"
-							string	typestr="filename"
+option	"normalize"		e	"Automatically detect/normalize single channel data"
+							flag	off
 option	"progressive"	O	"Generate output progressively"
 							flag	on
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.