Commits

opticall committed 76c438b

fixed bug with NAN rows

  • Participants
  • Parent commits ab76b4b

Comments (0)

Files changed (1)

File opticall/opticall.cpp

 
 	//cout << "num indices size " << num_indices.size() << endl;
 	
-	
+    
+	if (num_indices.size() == 0) {
+        cout << "EMPTY DATA - EMPTY!!!!" << endl;
+        //really shouldn't return this, probably better to throw an exception
+		return inputdata;
+	}
 
 
 	MatrixXd num_data(num_indices.size(),inputdata.cols());
 
 	//cout << "numeric data" << endl;
 	
-	if (num_indices.size() == 0) {
-		return num_data;
-	}
+
 	
 	//I could now look for outliers - and then use num_indices[i] as the exclusion
 	
 //run in the case that all the values are NaN
 MatrixXd probs_with_nans(int numclasses, vector<int> excluded_samples, int original_size ) {
 
-	cout << "original size - excluded " << original_size - excluded_samples.size() << endl;
+	//cout << "original size - excluded " << original_size - excluded_samples.size() << endl;
 	//cout << post_probs.rows() << endl;
 
 	MatrixXd post_probs_w_nans(original_size - excluded_samples.size(),numclasses);
 
 	//cout << post_probs_w_nans.rows() << " " << post_probs_w_nans.cols() << endl;
 
-	for (int i = 0; i < original_size; i++)
+	for (int i = 0; i < original_size - excluded_samples.size(); i++)
 	{
 		post_probs_w_nans.row(i) = MatrixXd::Zero(1, numclasses);
 		post_probs_w_nans(i,numclasses-1)=1;
 	}
 
-
+    //cout << "probs with NANs calculated" << endl;
+    
 
 	return post_probs_w_nans;
 }
 	//char * output_prob_ext = ".probs";
 	//char * output_call_ext = ".calls";
 
-	string version = "0.5.5";
+	string version = "0.6.1";
 	cout << "opticall version " << version << endl;
 	cout << "thank you for choosing opticall for your genotyping needs" << endl;
 
 	customcovars.push_back(Matrix<double, 2, 2>::Identity() * 100	);
 
 	MatrixXd sd_sample;
+    
+    bool successful_block = false;
 
 	/*******SAMPLING & Block step*******/
 	/********************************/
 		MatrixXi dummy_genders = MatrixXi::Zero(sample.size(), 1);;
 		MatrixXi dummy_batches = MatrixXi::Zero(sample.size(), 1);;
  		sample = strip_nans(sample, sample_nan_inds,nonorm,outlier_range,true, dummy_outliers, dummy_excluded_samples,dummy_genders, dummy_batches);
-		cout << "NaNs & outliers removed: " << sample_nan_inds.size() << endl;
+		
+        
+        if (sample.rows() == sample_nan_inds.size())
+        {
+            cout << "Only NANs found while creating prior information" << endl;
+            continue;
+        }
+        
+        
+        
+        cout << "NaNs & outliers removed: " << sample_nan_inds.size() << endl;
 		cout << "---------" << endl;
 		
 		//vector<int> sample_outl_inds;
 		if (   ! (isnan(blockmus.sum()) || isinf(blockmus.sum()) || blockjs.row(0).sum() == 0 || blockjs.row(1).sum() == 0 || blockjs.row(2).sum() == 0 )  )
 		{
 			cout << "prior information created successfully" << endl;
+            successful_block = true;
 			break;
 		}
 
 	} while (blockattempts <= 5);
 
-	if (  isnan(blockmus.sum()) || isinf(blockmus.sum()) || blockjs.row(0).sum() == 0 || blockjs.row(1).sum() == 0 || blockjs.row(2).sum() == 0   )
+	if ( ! successful_block || isnan(blockmus.sum()) || isinf(blockmus.sum()) || blockjs.row(0).sum() == 0 || blockjs.row(1).sum() == 0 || blockjs.row(2).sum() == 0   )
 	{
 		cout << "could not create priors from data following multiple attempts. Please check dataset & maybe try again. Exiting..." << endl;
 		exit(0);
         cout << "calling SNP: " << snpinfo2[l][0] << endl;
         
 		//int_data = strip_nans(int_data, nan_samples,nonorm,outlier_range,false,initial_outliers,excluded_samples,snp_genders);
+        
+        //cout << "int data size is " << int_data.rows() << endl;
+        
         int_data = strip_nans(int_data, nan_samples,true,outlier_range,false,initial_outliers,excluded_samples,snp_genders, snp_batches);
 		
         
         vector<int> line_mm_removed_indices;
         MatrixXd inference_int_data = int_data;
         MatrixXi inference_snp_genders = snp_genders;
-        if (!nonorm)
+        if (!nonorm && nan_samples.size() + excluded_samples.size() != data_count)
         {
             inference_int_data = strip_from_line_inference(int_data, line_mm_removed_indices, outlier_range, inference_snp_genders);
         }
+        else{
+            cout << "Caught empty data" << endl;
+        }
         
 		clock_t linetStart = clock();
 
 
-		if (nan_samples.size() == data_count) {
+		if (nan_samples.size() + excluded_samples.size() == data_count) {
 			//all the data for this SNP is NAN
 			cout << "all intensity data for snp is NaN, calling everything unknown"  << endl;;
 			MatrixXd post_probs = probs_with_nans(4, excluded_samples ,int_data_w_nans.rows() );