Commits

opticall committed ba3c635

fixed initial blocking bug when dataset has outliers

Comments (0)

Files changed (1)

opticall/opticall.cpp

 }
 
 
+
+
+MatrixXd strip_outliers(const MatrixXd &inputdata, vector<int> &exc_indices) {
+	
+	vector<int> num_indices;
+	
+	MatrixXd distances = inputdata.rowwise().norm();
+	
+	double mean_dist = distances.mean();
+	double sd_dist = sd_of_data(distances)(0,0);
+	
+	
+	for (int i = 0; i < distances.rows(); i++)
+	{
+		if ( distances(i,0) > mean_dist + 4.0*sd_dist  )
+		{
+			exc_indices.push_back(i);
+			cout << "excluding " << inputdata.row(i) << endl;
+		}
+		else
+		{
+			num_indices.push_back(i);
+		}
+	}
+	
+	MatrixXd num_data(num_indices.size(),inputdata.cols());
+	for (int i =0; i < num_indices.size(); i++)
+	{
+		num_data.row(i) = inputdata.row(num_indices[i]);
+	}
+	
+	return num_data;
+	
+	
+	
+}
+
+
+
 MatrixXd probs_with_nans(const MatrixXd &post_probs,const vector<int> &nan_indices, int original_size ) {
 
 
 	//char * output_prob_ext = ".probs";
 	//char * output_call_ext = ".calls";
 
-	string version = "0.1.3";
+	string version = "0.1.5";
 	cout << "opticall version " << version << endl;
 	cout << "thank you for choosing opticall for your genotyping needs" << endl;
 
 		sample = strip_nans(sample, sample_nan_inds);
 		cout << "NaNs removed: " << sample_nan_inds.size() << endl;
 		cout << "---------" << endl;
+		
+		vector<int> sample_outl_inds;
+		sample = strip_outliers(sample, sample_outl_inds);
+		
+		cout << "Outliers removed: " << sample_outl_inds.size() << endl;
+		cout << "---------" << endl;
+		
 		//cout << sample << endl;
 		sd_sample = sd_of_data(sample);
 		cout << "sd sample " << sd_sample << endl;