1. Mathieu Lajoie
  2. RED2

Commits

Mathieu Lajoie  committed 17a5547

removed some files

  • Participants
  • Parent commits c0fc4fa
  • Branches default

Comments (0)

Files changed (5)

File SOFT/java/DrawPics.java

View file
 	for(int i = 2; i < data.length; i++){ //DO NOT USE FIRST TWO ENTRIES
 	    double val = data[i];			
 	    int bin = (int)(((val-min)/range)*nb_bins);
-	    //System.out.println(bin);			
-	    //System.out.println(val+" "+bin);
 	    if(bin==nb_bins){
 		bin--;
 	    }
 	}
 	for(int i = 0 ; i < array.length; i++){
 	    double norm_val = (array[i]-min)/range;
-	    //System.out.println(array[i]+" "+norm_val);
 	    array[i]=norm_val;
 	}
 		

File SOFT/src/main_GO.cpp

-using namespace std;
-
-#include "data_def.h"
-#include "read.h"
-#include "print.h"
-#include "stats.h"
-#include <getopt.h>
-#include "GO_extract.h"
-#include "motif_extract.h"
-
-using namespace std;
-
-//THIS PROGRAM IS NOT COMPLETE YET
-//For the moment, it is only used to test the GO_extract class
-//TO DO : a practical tool to measure GO terms enrichment associated to a motif
-
-
-
-int main (int argc, char** argv) {
-
-  string go_file = "DATA/GO/gene_ontology_ext.obo";
-  string annot_file = "DATA/GO/gene_association.sgd";
-  string seq_file = "DATA/SEQ/YEAST/yeast_seq_fire_5883.fasta";
-
-  String1D name_vec;
-  String1D seq_vec;
-  if(read_fasta(seq_file, name_vec, seq_vec)<1){
-    cerr<<"No sequences loaded"<<ENDL;
-    exit(0);
-  };
-
-  int W = 7;
-  Motif_extract me(seq_vec,name_vec,W,600,false,true);
-  GO_extract go(go_file,annot_file);
-  
-  go.set_refset(name_vec);
-  
-  StringSet subset;
-  cout<<me.get_positive_genes("GATAAG",subset)<<" sequences contain the motif"<<ENDL;  
-  go.eval_subset(subset);
-  cout<<ENDL;
-  cout<<"Best GO terms :"<<ENDL;
-  cout<<ENDL;
-  string res =  go.get_most_significant_go_terms(10);
-  cout<<res<<ENDL;
-
-
-  cout<<ENDL;
-  cout<<"*THIS PROGRAM IS NOT COMPLETE YET"<<ENDL;
-  cout<<"*For the moment, it is only used to test the GO_extract class"<<ENDL;
-
-  return SUCCESS;
-}  

File SOFT/src/main_clean_seq.cpp

-using namespace std;
-
-//#define DEBUG=1
-#include "read.h"
-#include "data_def.h"
-#include "distances.h"
-#include "word.h"
-#include "print.h"
-#include <limits>
-
-using namespace std;
-
-//TO DO
-//use to format seq,lower case etc....
-
-#ifndef MAIN
-#define MAIN
-
-typedef unsigned long ulong; 
-typedef unsigned int usint; 
-
-struct Occ_gene{
-  int gene;
-  usint pos;
-  ulong kmer;
-  int nb_occ;
-};
-
-bool comp_Occ_gene(const Occ_gene& i, const Occ_gene& j){
-  if(i.kmer==j.kmer){
-    return i.gene<j.gene;
-  }
-  return i.kmer<j.kmer;
-}
-
-bool comp_Occ_gene_nb(const Occ_gene& i, const Occ_gene& j){
-  if(i.nb_occ!=j.nb_occ){
-    return i.nb_occ<j.nb_occ;
-  }
-    return i.kmer<j.kmer;
-}
-
-typedef vector<Occ_gene> Occ_gene1D;
-
-void create_index_DNA(int w, Occ_gene1D& occ_vec, Seq1D& seq_vec);
-void create_index_DNA(int w, Int2D& index_table, Seq1D& seq_vec);
-int remove_dup(Occ_gene1D& occ_vec, Seq1D& seq_vec,Int1D& removed_seq,int max_len);
-int align(string& s1, string& s2, int pos1, int pos2, int len, int& match_pos);
-
-
-int main (int argc, char** argv) {
-
-  //std::cout << std::numeric_limits<unsigned long int>::max();
-  //exit(0);
-
-  char option_char;
-
-  string seq_file;
-  string expression_file;
-  double threshold = 1;
-  int W = 15;
-  int max_align_len = 15;
-
-  int min_seq_size = 50;
-  int max_seq_size = 5000;
-  
-  while ((option_char = getopt (argc, argv, "s:e:T:W:")) != -1){
-    switch (option_char)
-      {
-      case 's': seq_file = optarg; break;
-      case 'e': expression_file = optarg; break;
-      case 'T': threshold = atof(optarg); break;
-      case 'W': W = atoi(optarg); break;
-	  
-      default: 
-	cout<<ENDL;
-	cout<<"Usage : main_clean_seq ";
-	cout<<"-s seq_file [-e expression_file] [-T threshold] [-W word_size]"<<ENDL;
-	exit(0);
-      }
-  }
-
-  if(seq_file==""){
-    cout<<ENDL;
-    cout<<"Usage : main_clean_seq ";
-    cout<<"-s seq_file [-e expression_file] [-T threshold] [-W word_size]"<<ENDL;
-    exit(0);
-  }
-
-  //////////// PARAM INFO /////////////////
-  string cmdl = ""; //CMDLINE
-  for( int i = 0; i < argc; i++ ){
-      cmdl += (string) argv[i] + " ";
-  }
-  
-  string date = "";//get_date();
-  string info = "#"+cmdl+ENDL+"#"+date;
-  ////////////////////////////////
-  
-  Seq1D seq_vec;
-  read_fasta(seq_file, seq_vec);
-  sort(seq_vec.begin(),seq_vec.end(),comp_Seq_size_rev);
-  int nb_seq = seq_vec.size();
-
-  //bool use_gene_set = false;
-  StringSet gene_set;
-  if(expression_file!=""){
-    read_gene_set(expression_file, gene_set);
-    //use_gene_set = true;
-  } 
-
-  cout<<"#Keeping "<<nb_seq<<" sequences (min length = "
-      <<min_seq_size<<" , max length = "<<max_seq_size<<")"<<ENDL;  
-
-  Occ_gene1D occ_vec;
-
-  //int max_len = W;
-  int nb_removed = 0;
-  W = min(15,W);
-
-  create_index_DNA(W,occ_vec,seq_vec);
-
-  cout<<"#Sorting occ"<<ENDL;
-  sort(occ_vec.begin(),occ_vec.end(),comp_Occ_gene);
-  int nb_occ = (int) occ_vec.size();
-  for(int i = 1; i<nb_occ;i++){
-    if(occ_vec[i].kmer==occ_vec[i-1].kmer){
-      occ_vec[i].nb_occ=occ_vec[i-1].nb_occ+1;
-    }
-  }
-  for(int i = nb_occ-1; i>0;i--){
-    if(occ_vec[i-1].kmer==occ_vec[i].kmer){
-      occ_vec[i-1].nb_occ = occ_vec[i].nb_occ;
-    }
-  }
-  
-  cout<<"#Removing occ"<<ENDL;
-
-  int new_slot = 0;
-  for(int i = 0; i<nb_occ;i++){
-    //if(occ_vec[i].nb_occ>1 && occ_vec[i].nb_occ<50){
-   if(occ_vec[i].nb_occ>1){
-      //if(1){
-      //keep
-      if(new_slot<i){
-	occ_vec[new_slot] = occ_vec[i];
-      }
-      new_slot++;
-    }//end keep
-  }
-  occ_vec.resize(new_slot);
-  
-  cout<<"#"<<(double)new_slot/nb_occ<<ENDL;
-
-  nb_occ = (int) occ_vec.size();
-  //sort(occ_vec.begin(),occ_vec.end(),comp_Occ_gene_nb);
-  sort(occ_vec.begin(),occ_vec.end(),comp_Occ_gene);
-
-  string outfilename = "t";
-  ofstream outfile(outfilename.data(), ios::out);
-
-  static Int1D removed_seq; 
-  for(int i = 49; i<50; i+= 1){
-    //for(int i = 25; i<26; i+= 1){    
-    removed_seq.resize(0);
-    removed_seq.resize(seq_vec.size(),0);
-    int nb_r = remove_dup(occ_vec,seq_vec,removed_seq,i);
-    cout<<i<<" "<<nb_r<<ENDL;
-    outfile<<i<<" "<<nb_r<<ENDL;
-  }
-
-  outfile.close(); 
-  outfilename = seq_file+".clean";
-  ofstream outfile2(outfilename.data(), ios::out);
-  outfile2<<"#"<<ENDL;
-  int nb_keep = 0;
-  nb_removed = 0;
-  for(int i = 0 ; i < nb_seq; i++){    
-    if(removed_seq[i]<1){    
-      outfile2<<">"<<seq_vec[i].name<<ENDL;
-      outfile2<<seq_vec[i].seq<<ENDL;
-      nb_keep++;
-      }
-    else{
-      nb_removed++;
-    }
-  }
-  outfile2.close();
-  
-  cerr<<nb_removed<<" removed sequence(s)"<<ENDL;
-  cerr<<nb_seq-nb_removed<< " sequence(s) left"<<ENDL;
-  
-  return 1;
-}  
-
-/*
-int remove_dup(Occ_gene1D& occ_vec,Seq1D& seq_vec,Int1D& removed_seq,int max_len){
-  
-  int nb_removed = 0;
-  int nb_occ = occ_vec.size();
-
-  for(int i = 0 ; i < nb_occ; i++){
-    Occ_gene& o1 = occ_vec[i];
-    int j = i+1;
-    while(occ_vec[i].kmer == occ_vec[j].kmer){     
-      Occ_gene& o2 = occ_vec[j];
-      if(removed_seq[o2.gene]<1 && occ_vec[i].gene != occ_vec[j].gene){
-	int match_pos = 0;
-	int len = align(seq_vec[o1.gene].seq, seq_vec[o2.gene].seq, o1.pos, o2.pos, max_len, match_pos);
-	if(len>=max_len){	  
-	  removed_seq[o2.gene]++;
-	  nb_removed++;
-	  //dupfile<<seq_vec[o2.gene].name<<" "<<seq_vec[o1.gene].name<<" "<<seq_vec[o1.gene].seq.substr(match_pos,max_len)<<ENDL;
-	}
-      }
-      j++;
-    }
-  }
-  return nb_removed;
-}
-*/
-
-int remove_dup(Occ_gene1D& occ_vec,Seq1D& seq_vec,Int1D& removed_seq,int max_len){
-  
-  int nb_removed = 0;
-  int nb_occ = occ_vec.size();
-
-  string filename = "dup"+toString(max_len);
-
-  ofstream dupfile( filename.data(),ios::out);
-  dupfile<<"#List of duplicated kmers"<<ENDL; 
-  dupfile<<"#match threshold : "<<max_len<< "bp"<<ENDL;
-  dupfile<<"#[seq1] [seq2] [match]"<<ENDL;
-
-  for(int i = 0 ; i < nb_occ; i++){
-    Occ_gene& o1 = occ_vec[i];
-    int j = i+1;
-    while(j<nb_occ && occ_vec[i].kmer == occ_vec[j].kmer && removed_seq[o1.gene]==0 ){     
-      Occ_gene& o2 = occ_vec[j];
-      if(o1.gene != o2.gene){
-	int match_pos = 0;
-	int len = align(seq_vec[o1.gene].seq, seq_vec[o2.gene].seq, o1.pos, o2.pos, max_len, match_pos);
-	if(len>=max_len){	  
-	  removed_seq[o1.gene]++;
-	  nb_removed++;
-	  dupfile<<seq_vec[o2.gene].name<<" "<<seq_vec[o1.gene].name<<" "<<seq_vec[o1.gene].seq.substr(match_pos,max_len)<<ENDL;
-	}
-      }
-      j++;
-    }
-  }
-  dupfile.close();
-  return nb_removed;
-}
-
-int align(string& s1, string& s2, int pos1, int pos2, int len, int& match_pos){
-  int s1_size = s1.size(); 
-  int s2_size = s2.size();
-  int b1=pos1;int b2=pos2;
-  int e1=pos1;int e2=pos2;
-  //ALIGN BACKWARD
-  while(b1>=0 && b2>=0 && s1[b1]==s2[b2] && e1-b1+1 <= len && s1[b1]!='N'){
-    b1--;b2--;
-  }
-  b1++;b2++;
-  //ALIGN FORWARD
-  while(e1<s1_size && e2<s2_size && s1[e1]==s2[e2] && e1-b1+1 <= len && s1[e1]!='N'){
-    e1++;e2++;
-  }
-  e1--;e2--;
-  match_pos = b1;
-  return e1-b1+1; //ALIGNMENT LENGTH
-}
-
-void create_index_DNA(int w, Occ_gene1D& occ_vec, Seq1D& seq_vec){
-
-  //StringSet set;
-  int reserve_size = seq_vec.size()*seq_vec[0].seq.size();
-  occ_vec.reserve(reserve_size);
-
-  ulong MAX_INDEX = pow(4,w);
-  //cout<<"max index = "<<MAX_INDEX<<ENDL;
-  //cout<<"nb seq = "<<seq_vec.size()<<ENDL;
-
-  ulong index = 0;
-
-  int bad_pos = w;
-  int start_j = 0;
-  int end_j = 0;
-  int nb_genes = seq_vec.size();
-
-  for(int gene = 0; gene < nb_genes; gene++){
-    progression("Creating index ",gene,nb_genes);     
-    string& seq = seq_vec[gene].seq; 
-    end_j = seq.size();
-    bad_pos = w;
-
-    for(int j = start_j; j < end_j; j++){	//TO DO ! MAX DIST
-	index = (index*4)%(MAX_INDEX);	
-	switch (seq[j]){
-	case 'A': index+=0;bad_pos--;break;
-	case 'C': index+=1;bad_pos--;break;
-	case 'G': index+=2;bad_pos--;break;
-	case 'T': index+=3;bad_pos--;break;
-	case 'a': index+=0;bad_pos--;seq.replace(j,1,1,'A');break;
-	case 'c': index+=1;bad_pos--;seq.replace(j,1,1,'C');break;
-	case 'g': index+=2;bad_pos--;seq.replace(j,1,1,'G');break;
-	case 't': index+=3;bad_pos--;seq.replace(j,1,1,'T');break;  
-	default: bad_pos=w;break;
-	}	
-	if(bad_pos<=0){
-	  Occ_gene occ;	 
-	  occ.pos = j;
-	  occ.gene = gene;
-	  occ.kmer = index;
-	  occ.nb_occ = 1;
-	  occ_vec.push_back(occ); 
-	  //set.insert(seq.substr(j-w+1,w));
-	}
-      }
-  }//end for gene  
-
-  cout<<"Index size : "<<occ_vec.size()<<endl;
-}
-
-void create_index_DNA(int w, Int2D& index_table, Seq1D& seq_vec){
-
-  ulong MAX_INDEX = pow(4,w);
-  index_table.resize(MAX_INDEX+1);
-  int index_size=0;
-
-  ulong index = 0;
-
-  int bad_pos = w;
-  int start_j = 0;
-  int end_j = 0;
-  int nb_genes = seq_vec.size();
-
-  for(int gene = 0; gene < nb_genes; gene++){
-    progression("Creating index ",gene,nb_genes);     
-    string& seq = seq_vec[gene].seq; 
-    end_j = seq.size();
-    bad_pos = w;
-
-    for(int j = start_j; j < end_j; j++){	//TO DO ! MAX DIST
-	index = (index*4)%(MAX_INDEX);	
-	switch (seq[j]){
-	case 'A': index+=0;bad_pos--;break;
-	case 'C': index+=1;bad_pos--;break;
-	case 'G': index+=2;bad_pos--;break;
-	case 'T': index+=3;bad_pos--;break;
-	case 'a': index+=0;bad_pos--;seq.replace(j,1,1,'A');break;
-	case 'c': index+=1;bad_pos--;seq.replace(j,1,1,'C');break;
-	case 'g': index+=2;bad_pos--;seq.replace(j,1,1,'G');break;
-	case 't': index+=3;bad_pos--;seq.replace(j,1,1,'T');break;  
-	default: bad_pos=w;break;
-	}	
-	if(bad_pos<=0){
-	  index_table[index].push_back(gene);
-	  index_size++;
-	}
-      }
-  }//end for gene  
-
-  cout<<"Index size : "<<index_size<<endl;
-}
-#endif

File SOFT/src/main_compare.cpp

-using namespace std;
-
-//#define DEBUG=1
-#include "read.h"
-#include "PWM_db.h"
-#include "data_def.h"
-#include "distances.h"
-#include "word.h"
-#include "print.h"
-#include <limits>
-
-// CLEAN THIS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
-using namespace std;
-
-int main (int argc, char** argv) {
-
-
-  char option_char;
-
-  string matrix_file = "DATA/MOTIFS/jaspar.matrix"; //-x
-  string motif_file = ""; //-m
-  //double E_threshold = 0.1; //-E
-  //double T_threshold = 0.8; //-C
-  //double FDR_threshold = 0.1; //-C
-  //double db_subset_ratio = 1; //just for test
-  //int rand_size = 100000; 
-
-  ///////////////////////////////////////////////////////
-  srand( 0 );
-  //////////////////////////////////////////////////////
-
-  string out_path = "";
-
-  bool RANDOM_TEST = false;
-
-  while ((option_char = getopt (argc, argv, "m:x:o:E:T:F:RS:N:")) != -1){
-    switch (option_char)
-      {
-      case 'm': motif_file = optarg; break;
-      case 'x': matrix_file = optarg; break;
-      case 'o': out_path = optarg; break;
-	//case 'E': E_threshold = atof(optarg); break;
-	//case 'T': T_threshold = atof(optarg); break;
-	//case 'F': FDR_threshold = atof(optarg); break;
-	//case 'S': db_subset_ratio = atof(optarg); break;
-      case 'R': RANDOM_TEST = true; break;
-	//case 'N': rand_size = atoi(optarg); break;
-
-      default: 
-	cout<<ENDL;
-	cout<<"Usage : red2_compare -m motifs_file -x database_file [-o output_path]"<<ENDL;
-	//cout<<"-s seq_file -e expression_file [-T threshold] [-W word_size]"<<ENDL;
-	exit(0);
-      }
-  }
-
- 
-  //int max_pwm_len = 99;
-  int max_pwm_len = 99;
-
-  // DATABASE/////////////////////////////////////////
-  //PWM_db db(matrix_file, rand_size, max_pwm_len); 
-  PWM_db db(matrix_file); 
-  cout<<"# "<<db.size()<<" PWMs loaded in database"<<ENDL;
-  //db.random_subset(db_subset_ratio);
-  //db.print_db("scerTF_rand_25");
-
-  db.trim_matrices(max_pwm_len);
-
-  // QUERY PWM_VEC /////////////////////////////////
-  PWM1D pwm_vec;
-  int return_code = load_PWMs(motif_file,pwm_vec); 
-  int nb_motifs = pwm_vec.size();
-  cout<<"# "<<nb_motifs<<" motifs loaded"<<ENDL;
-  db.trim_matrices(max_pwm_len,pwm_vec);
-
-  if(return_code!= SUCCESS){
-    return return_code;
-  }
-  
-  
-  // TEST ////////////////////////////////////////////////////
-  //db.print_db("test_before.txt");
-  if(0){
-  PWM_extract pwme("DATA/SEQ/YEAST/yeast_1kb.fasta");
-  cout<<"TEST UPDATE FREQ!!!!"<<ENDL;
-   for(int i = 0; i< nb_motifs; i++){   
-     pwme.update_frequencies(pwm_vec[i], 500);          
-     //cerr<<i<<ENDL;
-   }
-   for(int i = 0; i<db.size(); i++){   
-     pwme.update_frequencies(db.db[i], 500);          
-     //cerr<<i<<ENDL;
-   }
-  }
-  //db.print_db("test_after.txt");
-  /////////////////////////////////////////////////////
-  
-
-
-  //db.make_random_db(rand_size);
-  db.make_random_db();
-  cout<<"# "<<db.size()<<" PWMs loaded in database"<<ENDL;
-  
-
-  PWM_match2D match_tab;
-  int nb_best = 1; //WE WANT ONLY THE BEST
-
-  //RANDOM TEST
-  if(RANDOM_TEST){
-    cout<<"# RANDOM TEST"<<ENDL;
-    db.randomize(pwm_vec);
-  }  
-
-  db.match_all(pwm_vec, match_tab, nb_best); //MATCH ALL (keep only best)
-  db.estimate_FDR_rand_db(match_tab);  
-
-  //cout<<db.print_best_match(match_tab, E_threshold);
-  //cout<<db.print_best_match_eval(match_tab, E_threshold);
-  //cout<<db.print_best_match_FDR(match_tab, FDR_threshold);
-
-  // TO DO : PROTECT ORDER IN MATCH TABLE !!!!!!!!!!!!!!!1
-
-  //cout<<res_fdr<<ENDL; 
-  string dbfile = remove_path_and_extension(matrix_file);
-  // TO DO : PROTECT ORDER IN MATCH TABLE !!!!!!!!!!!!!!!1
-  string res_fdr = db.print_match_FDR(match_tab); //only FDR distribution  !!!!!!!!!!!! WARNING : CHANGE THE ORDER IN MATCH TABLE !!!!!!!
-  string outfile_fdr = motif_file+"."+dbfile+".fdr";//only FDR distribution
-  print_file_str(res_fdr,outfile_fdr);//only FDR distribution
-  cout<<res_fdr<<ENDL; 
-
-  string res_fdr_full = db.print_best_match_FDR(match_tab); //DETAILED
-  string outfile_fdr_full = motif_file+"."+dbfile+".fdr_full";
-  //string res_fdr_full = db.print_best_match_FDR2(match_tab,0.1); //DETAILED
- 
-  print_file_str(res_fdr_full,outfile_fdr_full);
-   cout<<res_fdr_full<<ENDL; 
-}
-
-
-//////////////////////////////////////
-
-// TO DO : PROTECT ORDER IN MATCH TABLE !!!!!!!!!!!!!!!1

File SOFT/src/main_motif_extract.cpp

-using namespace std;
-#include "read.h"
-#include "motif_extract.h"
-#include <stdio.h>  /* defines FILENAME_MAX */
-
-using namespace std;
-
-/*THIS PROGRAM IS NOT COMPLETE YET*/
-
-//TO DO : a practical tool to extract and report IUPAC occurrences 
-//For the moment, it is only used to test the Motif_extract class
-
-//Note for myself : the blast-like index maybe not the best solution for this tool... (memory and time)
-
-int main (int argc, char** argv) {
-
-  string seq_file = "DATA/SEQ/ARABI/TAIR10_upstream_1000_translation_start_20101028.fasta";
-  string motif_str = "AAAAWTTTC";  
-  char option_char;
-  
-  while ((option_char = getopt (argc, argv, "s:m:?")) != -1){
-    switch (option_char) 
-      {
-      case 's': seq_file = optarg; break;
-      case 'm': motif_str = optarg; break;
-      default: 
-	cerr<<ENDL;
-	cerr<<"Invalid option : "<<optarg<<ENDL;
-	cerr<<ENDL;
-	return INPUT_ERROR;
-      }
-  }
-
-  String1D name_vec;
-  String1D seq_vec;
-  read_fasta(seq_file, name_vec, seq_vec); 
-  int max_dist = 200000000;
-  
-  Motif_extract me(seq_vec,name_vec,6,max_dist,true,true); //TO DO: remove background dist
-
-  Occ1D ovec;
-  me.get_occ_vec(motif_str,ovec);
-  for(int i=0;i<(int)ovec.size();i++){
-    string seq_name = name_vec[ovec[i].gene];
-    int left = ovec[i].pos;
-    int right = left + ovec[i].len - 1;
-    char strand = '+';
-    int score = 1;
-    int name = i;
-    if(ovec[i].rev_strand){strand='-';}
-    cout<<seq_name<<" "<<left<<" "<<right<<" "<<"occ"<<name<<" "<<score<<" "<<strand<<ENDL;
-  }
-
-  cout<<ENDL;
-  cout<<"*THIS PROGRAM IS NOT COMPLETE YET"<<ENDL;
-  cout<<"*For the moment, it is only used to test the GO_extract class"<<ENDL;
-
-}
-
-//TO DO : a practical tool to extract and report PWM occurrences (mainly used to test the PWM class)
-/*
-
-using namespace std;
-
-#include "read.h"
-#include "motif_extract.h"
-#include "PWM.h"
-#include "PWM_db.h"
-#include <stdio.h>  //defines FILENAME_MAX
-
-using namespace std;
-
-
-int main (int argc, char** argv) {
-
-  string seq_file = "DATA/SEQ/ARABI/TAIR10_upstream_1000_translation_start_20101028.fasta";
-  string filename_db = "DATA/MOTIFS/all_jaspar.matrix";  
-  string motif_db = "";
-
-
-  String1D name_vec;
-  String1D seq_vec;
-  read_fasta(seq_file, name_vec, seq_vec);  
-  Motif_extract me(seq_vec,name_vec,6,5000,false,true); 
- 
-  PWM_db db;
-  db.read_matrices(filename_db);
-  cerr<<db.size()<< " matrices loaded!"<<ENDL;
-
-  StringDouble1D kmer_vec;
-  int nb_best = 100;
-  db.db[66].get_best_kmers(kmer_vec,nb_best);
-  int nb = kmer_vec.size();
-  Occ2D occ_table(nb);
-  for(int i = 0 ; i < nb ; i++){
-    me.get_occ_vec(kmer_vec[i].String,occ_table[i]);
-    cerr<<kmer_vec[i].String<<" "<<kmer_vec[i].Double<<" "<<occ_table[i].size()<<ENDL;
-  }
-  cerr<<nb<<ENDL;
-}
-*/