Commits

Inti Pedroso committed 7c5c078 Merge

minor bug fixed

  • Participants
  • Parent commits 7db8e24, b02f73c
  • Branches develop, feature/correction_gcv2 2
    1. feature/docs
    2. release/0.01

Comments (0)

Files changed (10)

File cnv_calling.py

   if args.sweep_parallel_at > 0.1:
     args.sweep_parallel_at = int(args.sweep_parallel_at)
 
-iCC = functions.iCC()
+import utilities
+
+iCC = utilities.cnv_caller.iCC()
 iCC.working_distn = 'negative_binomial'
 #iCC.working_distn = 'gaussian'
 # set files with data to work on 

File functions.py

-from __future__ import division
-import sys
-sys.path.append('/home/inti/My_Soft/')
-sys.path.append('/home/pedrosoi/APP/')
-import pyhsmm
-from pyhsmm.util.text import progprint_xrange
-
-import pandas as pd
-import numpy as np
-import time 
-import copy
-observed_states = ''
-genome_average_mean = ''
-log_w = ''
-states_means = ''
-
-
-def plot_cnv_profile(table,cnv_segments=None,data_mean = None):
-    from matplotlib import pylab as plt
-    # get the range of data on x and y axis
-    Xs = (table.end.values + table.start.values)/2
-    if data_mean is None:
-      data_mean = np.median(table['counts'])
-    Ys = table['counts']/data_mean
-    xmin, xmax = Xs.min(), Xs.max()
-    ymin, ymax = Ys.min(), Ys.max()
-    ymin = 0
-    plt.xlim(xmin=xmin,xmax=xmax)
-    # add horizontal grid lines
-    for cn in set([0.5/5*y_i for y_i in xrange(0, 1 + int(round(ymax+0.5))*10,5)]):
-      plt.axhline(y=cn,color='gray', linestyle='--')
-    #plot the raw count data normalize by the genome median
-    plt.plot(Xs,Ys,'g+')
-    
-    # plot the confidence interval shaded area for the posterior probability of being a CNV
-    #plt.fill_between(x=Xs,y1=table['post_prob_of_cnv_upper_ci'].values,y2=table['post_prob_of_cnv_lower_ci'].values, color='red', alpha=0.2, edgecolor='white' );
-    # plot the post-prob of being a CNV
-    plt.plot(Xs,table['post_prob_of_cnv'],'r-');
-    
-    # plot the estimaded mean anc CI for each bin
-    plt.errorbar(Xs,table['cnv_call'],yerr = [table['cnv_call_lower_ci']-table['cnv_call'], table['cnv_call']-table['cnv_call_upper_ci']  ], fmt='co');
-    if type(cnv_segments) == pd.core.frame.DataFrame:
-      plt.hlines(y=cnv_segments.genotype,colors='purple', linewidth = 2,linestyle='solid',xmin=cnv_segments.start,xmax=cnv_segments.end)
-      for row in cnv_segments.iterrows():
-	plt.fill_between(x=[row[1][3],row[1][4]], y1=row[1][9], y2=row[1][8],color='purple', alpha=0.2, edgecolor='white' )
-    ## make the plot
-    plt.show()
-
-
-def start_ipcluster(N,profile = None):
-  import os
-  if profile is None:
-    profile = ''.join(('PID',str(os.getpid())))
-  print time.ctime(),'Starting Ipython Cluster with [',N,'] engines and profile name [',profile,']'
-  cmd = ''.join(('ipcluster start --daemonize=True --n=',str(N),' --profile=',profile))
-  os.system(cmd)
-  os.system('sleep 10')
-  return profile
-
-def stop_ipcluster(profile=None):
-  import os
-  if profile is None:
-    profile = ''.join(('PID',str(os.getpid())))
-  print time.ctime(),'Stopping Ipython Cluster with profile name [',profile,']'
-  cmd = ''.join(('ipcluster stop --profile=',profile))
-  os.system(cmd)
-
-
-class iCC():
-  def __init__(self):
-    print time.ctime(), "Initializing CNV caller object"
-    # set some defauls values
-    self.split_in_gaps = False
-    self.min_gap_size = 0
-    self.max_segment_size = 5000
-    self.min_segment_size = 500 
-    self.min_group_size = 500
-    self.Nmax = 0
-    self.hyper_priors = {}
-    self.prior_dist = {}
-    self.working_distn = None
-    self.MCMCopts = dict(niter = 100, burnin = 50, sample_freq = 5)
-    self.sampled_models = {}
-    self.changepoint_counts = {}
-    self.observed_states_counts = {}
-    self.model_opts = dict(alphakappa_a_0=50.,
-			    alphakappa_b_0=1.,
-			    rho_a_0=5000.,
-			    rho_b_0=1., # higher rho_a_0 means stickier
-			    gamma_a_0=1.,
-			    gamma_b_0=3.,# higher rho_a_0 means stickier, rhos is the self-transition fraction and is distributed as a Beta
-			    init_state_concentration=50.
-			   )
-    self.model = {}
-    
-  def _restart(self):
-    self.read_data()
-    self.set_hyperpriors()
-    self.define_prior_distns()
-    self.set_model()
-    self.load_data_into_model()
-
-    
-  def set_model(self):
-    import pyhsmm
-    self.model[self.working_distn] =   pyhsmm.models.StickyHMMEigen(
-						      obs_distns     = self.prior_dist[self.working_distn],
-						      alphakappa_a_0=50.,alphakappa_b_0=1.,
-						      rho_a_0=5000.,rho_b_0=1., # higher rho_a_0 means stickier
-						      gamma_a_0=1.,gamma_b_0=3.,# higher rho_a_0 means stickier, rhos is the self-transition fraction and is distributed as a Beta
-						      init_state_concentration=50.)
-						      #alphakappa_a_0 = self.model_opts['alphakappa_a_0'],
-						      #alphakappa_b_0 = self.model_opts['alphakappa_b_0'],
-						      #rho_a_0        = self.model_opts['rho_a_0'],
-						      #rho_b_0        = self.model_opts['rho_b_0'], # higher rho_a_0 means stickier
-						      #gamma_a_0      = self.model_opts['gamma_a_0'],
-						      #gamma_b_0      = self.model_opts['gamma_b_0'],# higher rho_a_0 means stickier, rhos is the self-transition fraction and is distributed as a Beta
-						      #init_state_concentration = 50.)#self.model_opts['init_state_concentration']# this implies a tight prior on gamma controlling the stick-breaking process. The stick-breaking process is a Beta(1,gamma)
-					
-  
-  def load_data_into_model(self,transform = None):
-    print time.ctime(), 'Loading data'
-    for i in self.data.keys():
-      data = self.data[i]
-      if transform is not None:
-	data = transform(data)
-      self.model[self.working_distn].add_data(data)
-    print time.ctime(), '   ... done'
-    
-  def sample(self,save = False,niter = None, freq = None, parallel=False,numtoresample='all',all_at = 0.1, verbose = False):
-    number_of_interations = self.MCMCopts['niter']
-    save_freq =  self.MCMCopts['sample_freq']
-    if niter is not None:
-      number_of_interations = niter
-    if freq is not None:
-      save_freq = freq
-     
-    for itr in progprint_xrange(number_of_interations):
-      if parallel:
-	n_to_resample = numtoresample
-	if (itr % all_at) == 0:
-	  n_to_resample = 'all'
-	self.model[self.working_distn].resample_model_parallel2(numtoresample=n_to_resample)
-      else:
-	self.model[self.working_distn].resample_model()
-      if save:
-	if self.working_distn not in self.sampled_models.keys():
-	  self.sampled_models[self.working_distn] = {}
-	if (itr % save_freq) == 0:
-	  self.sampled_models[self.working_distn][itr] = copy.deepcopy(self.model[self.working_distn])
-      if verbose:
-	m = self.model[self.working_distn] 
-	print itr, m.trans_distn.alpha, m.trans_distn.gamma, m.trans_distn.rho, m.trans_distn.kappa, m.trans_distn.beta.max()
-
-  def _define_data_storage(self):
-    self.changepoint_counts[self.working_distn] = {}
-    self.observed_states_counts[self.working_distn] = {}
-    for idx, s_list in enumerate(self.model[self.working_distn].states_list):
-      self.changepoint_counts[self.working_distn][idx] = np.zeros_like(s_list.data)
-      # observed states at sequences
-      self.observed_states_counts[self.working_distn][idx] = np.zeros((self.Nmax,len(s_list.data)))
-  
-  def _update_changepoint_and_observed_states_counts(self):
-    for m in self.sampled_models[self.working_distn].values():
-      for idx, s_list in enumerate(m.states_list):
-	self.changepoint_counts[self.working_distn][idx] += np.concatenate(( (0,), np.diff(s_list.stateseq) != 0 ))
-	for i, j in enumerate(s_list.stateseq):
-	  self.observed_states_counts[self.working_distn][idx][j,i] += 1
-
-  def _normalize_counts(self):
-    Nmodels = len(self.sampled_models[self.working_distn])
-    for idx in self.observed_states_counts[self.working_distn].keys():
-	for row_idx in xrange(self.observed_states_counts[self.working_distn][idx].shape[0]):
-	  self.observed_states_counts[self.working_distn][idx][row_idx] /= Nmodels
-	self.changepoint_counts[self.working_distn][idx] /= Nmodels
-	
-  def _calculate_average_states_mean_var(self):
-    self.states_moments = {}
-    self.states_params = {}
-    states_means = ''
-    states_var = ''
-    if self.working_distn == 'negative_binomial':
-      Rs = np.zeros(self.Nmax)
-      Ps = np.zeros(self.Nmax)
-      for m in self.sampled_models[self.working_distn].values():
-	for i,distn in enumerate(m.obs_distns):
-	  Rs[i] += distn.r
-	  Ps[i] += distn.p
-      Nmodels = len(self.sampled_models[self.working_distn])
-      Rs /= Nmodels
-      Ps /= Nmodels
-      
-      states_means = [ Rs[i]*Ps[i]/(1-Ps[i]) for i in xrange(self.Nmax) ]
-      states_var   = [ Rs[i]*Ps[i]/(1-Ps[i])**2 for i in xrange(self.Nmax) ]
-      self.states_params[self.working_distn] = {}
-      self.states_params[self.working_distn]['Rs'] = Rs
-      self.states_params[self.working_distn]['Ps'] = Ps
-
-    elif self.working_distn == 'gaussian':
-      MUs = np.zeros(self.Nmax)
-      SIGMASQs = np.zeros(self.Nmax)
-      for m in self.sampled_models[self.working_distn].values():
-	for i,distn in enumerate(m.obs_distns):
-	  MUs[i] += distn.mu
-	  SIGMASQs[i] += distn.sigmasq
-      Nmodels = len(self.sampled_models[self.working_distn])
-      MUs /= Nmodels
-      SIGMASQs /= Nmodels
-      self.states_params[self.working_distn] = {}
-      self.states_params[self.working_distn]['mu'] = MUs
-      self.states_params[self.working_distn]['sigmasq'] = SIGMASQs
-      states_means = MUs 
-      states_var = SIGMASQs
-    else:
-      NotImplementedError
-    
-    states_means = np.hstack(states_means)
-    states_var = np.hstack(states_var)
-    self.states_moments[self.working_distn] = np.zeros((self.Nmax,4))
-    self.states_moments[self.working_distn][:,0] = states_means
-    self.states_moments[self.working_distn][:,1] = states_var
-    self.states_moments[self.working_distn][:,2] = states_means + 1.96*np.sqrt(states_var)
-    self.states_moments[self.working_distn][:,3] = states_means - 1.96*np.sqrt(states_var)
-    
-  
-  def _data_median(self):
-    values = pyhsmm.util.stats.flattendata([l.data for l in self.model[self.working_distn].states_list ])
-    weights = pyhsmm.util.stats.flattendata([self.data_files[idx].post_prob_of_cnv.values for idx in self.data_files.keys()])
-    weights -= 1
-    self.data_mean, self.data_variance = self.weighted_avg_and_std(values, weights=weights)
-    #self.data_mean,self.data_variance = self._fixed_effect_combined_stat(self.states_moments[self.working_distn][:,0],self.states_moments[self.working_distn][:,1],np.exp(self.states_log_prob_of_CN1))
-    self._data_lower_ci = self.data_mean - 1.96*np.sqrt(self.data_variance)
-    self._data_upper_ci = self.data_mean + 1.96*np.sqrt(self.data_variance)
-    
-
-  def _states_log_prob_of_CN1(self):
-    Nmodels = len(self.sampled_models[self.working_distn])
-    log_w = np.vstack([ np.apply_along_axis(np.sum,1,self.observed_states_counts[self.working_distn][idx]*Nmodels) for idx in self.observed_states_counts[self.working_distn].keys()  ])
-    if len(self.observed_states_counts[self.working_distn].keys()) > 1:
-      log_w = np.apply_along_axis(np.sum,0,log_w)      
-    else:
-      log_w = log_w[0]
-    log_w = np.log(log_w)
-    log_w -= np.logaddexp.reduce(log_w)
-    self.states_log_prob_of_CN1 = log_w
-
-  def _post_prob_of_cnv(self):
-    print time.ctime(),"   Calculating prob of CNV" 
-    for idx in self.observed_states_counts[self.working_distn].keys():
-      self.data_files[idx]['post_prob_of_cnv'] = np.hstack([ np.dot(self.observed_states_counts[self.working_distn][idx][:,row_idx],self.observed_states_counts[self.working_distn][idx][:,row_idx]) - np.dot(self.observed_states_counts[self.working_distn][idx][:,row_idx], np.exp(self.states_log_prob_of_CN1)) for row_idx in xrange(len(self.model[self.working_distn].states_list[idx].data)) ])
-      self.data_files[idx]['post_prob_of_cnv'][ self.data_files[idx]['post_prob_of_cnv'] < 0 ] = 0
-      self.data_files[idx]['post_prob_of_cnv'][ self.data_files[idx]['post_prob_of_cnv'] > 1 ] = 1
-	
-  def _mean_ci_for_cnv(self):
-    for idx in self.observed_states_counts[self.working_distn].keys():
-	print time.ctime(),"   Working on dataset [", idx+1,"] of [",self.Ndata_files,"]" 
-	self.data_files[idx]['cp_count'] = self.changepoint_counts[self.working_distn][idx]
-	for col in ['cnv_call','cnv_call_lower_ci','cnv_call_upper_ci' ]:
-	  self.data_files[idx][col] = np.nan
-	for row_idx in xrange(self.observed_states_counts[self.working_distn][idx].shape[1]):
-	  # calculate meand and sd of CNVs 
-	  m,v = self._fixed_effect_combined_stat(self.states_moments[self.working_distn][:,0],self.states_moments[self.working_distn][:,1],self.observed_states_counts[self.working_distn][idx][:,row_idx])
-	  m_lci = m - 1.96*np.sqrt(v)
-	  m_uci = m + 1.96*np.sqrt(v)
-	  # store the cnv params
-	  self.data_files[idx]['cnv_call'][row_idx] = np.double(m)/np.double(self.data_mean)
-	  self.data_files[idx]['cnv_call_lower_ci'][row_idx] = m_lci/self.data_mean # self._data_lower_ci 
-	  self.data_files[idx]['cnv_call_upper_ci'][row_idx] = m_uci/self.data_mean # self._data_upper_ci
-  
-  def _summarize_cnv_regions(self,x,idx):
-    back = np.zeros(9)
-    # chromosome
-    back[0] = x.ix[x.irow(0).name,'chr']
-    # start
-    back[1] = x.ix[x.irow(0).name,'start']
-    # end
-    back[2] = x.ix[x.irow(len(x)-1).name,'end']
-    # n_data_points
-    back[3] = len(x)
-    # size  
-    back[4] = back[2] - back[1]
-    # meand and var
-    w = np.apply_along_axis( np.sum,1,self.observed_states_counts[self.working_distn][idx][:,x.index] )
-    w /= w.sum()
-    cn_m, cn_v = self._fixed_effect_combined_stat(self.states_moments[self.working_distn][:,0],self.states_moments[self.working_distn][:,1],w)
-    back[5] = cn_m/self.data_mean
-    back[6] = (cn_m - 1.96*np.sqrt(cn_v))/self._data_lower_ci
-    back[7] = (cn_m + 1.96*np.sqrt(cn_v))/self._data_upper_ci
-    cnv_post_prob = np.dot(w,w) - np.dot(np.exp(self.states_log_prob_of_CN1),w) #1 - np.exp(np.apply_along_axis( np.logaddexp.reduce,1,self.states_log_prob_of_CN1 + np.log(w) ) )
-    back[8] = cnv_post_prob
-    return pd.DataFrame(back).T
-
-  def _find_segment_positions(self,prob=0.9, min_size=0,max_size=np.inf):
-    calls = {}
-    for file_idx in self.data_files.keys():
-      f = self.data_files[file_idx]
-      cnv_regions = f[f.post_prob_of_cnv > prob]
-      step = np.diff(cnv_regions.start.values)
-      normal_step = np.median(step)
-      cnv_regions['step'] = 0
-      cnv_regions['step'][1:] = step
-      ids = 0
-      cnv_regions['cnv_id'] = 0
-      for i in xrange(len(cnv_regions)):
-	true_row_id = cnv_regions.irow(i).name 
-	if cnv_regions.ix[true_row_id,'step'] > normal_step:
-	  ids +=1
-	cnv_regions.ix[true_row_id,'cnv_id'] = ids
-
-      cnv_regions = cnv_regions.groupby('cnv_id').apply(lambda x: self._summarize_cnv_regions(x,file_idx))
-      cnv_regions.columns = ['chromosome','start','end','n_dat_point','size','mean_cnv','lower_ci_cnv','upper_ci_cnv','cnv_post_prob']
-      cnv_regions['genotype'] = np.round(cnv_regions['mean_cnv']/0.5)*0.5
-      # filter
-      cnv_regions = cnv_regions[cnv_regions.size > min_size]
-      cnv_regions = cnv_regions[cnv_regions.size < max_size]
-      calls[file_idx] = cnv_regions.reset_index()
-    self.cnv_call = {}
-    self.cnv_call[self.working_distn] = calls
-  
-  def process_samples(self):
-    print time.ctime(),"Processing samples from the posterior"
-    self._define_data_storage()
-    # count number of times at each data point there is a change in state
-    #self._update_changepoint_counts()
-    #self._update_observed_states_counts()
-    self._update_changepoint_and_observed_states_counts()
-    # Normalize the counts to be frequencies.
-    self._normalize_counts()
-    # calculate the prob for each post-sample of a state representing copy number 1
-    self._states_log_prob_of_CN1()
-    # calculate prob of cnv at each data point
-    self._post_prob_of_cnv()
-    # calculate the mean and var of the states
-    self._calculate_average_states_mean_var()
-    # calculate the median of the data
-    if not hasattr(self,'data_mean'):
-      self._data_median()
-    print time.ctime(),"Summarising CNV calls"
-    self._mean_ci_for_cnv()
-    print time.ctime(), "Calling CNVs"
-    self._find_segment_positions()
-    print time.ctime(), "   ... done"
-
-
-  def focus_on_cnvs(self,prob = 0.5,flank = 100, burnin=0,niter=0,freq=0):
-    self.process_samples()
-    self.sampled_models = {}
-    self.observed_states_counts = {}
-    self.changepoint_counts = {}
-    if not hasattr(self ,'_original_data'):
-      self._original_data = {}
-      self._last_states_list = []
-      self._cnv_regions = {}
-      for idx in self.data_files.keys():
-	self._original_data[idx] = self.data_files[idx].copy()
-      self._last_states_list = copy.deepcopy(self.model[self.working_distn].states_list)
-	
-    for idx in self.data_files.keys():
-      # replace the original data file table to be able to call cnvs next time without problems
-      indexes = self.data_files[idx][self.data_files[idx].post_prob_of_cnv > prob].index.values
-      indexes = np.unique(np.ravel([[range(i,i+flank),range(i-flank,i)] for i in indexes ]))
-      indexes = indexes[indexes >= 0]
-      indexes = indexes[indexes <= np.max(self.data_files[idx].index.values)]
-      self.data_files[idx] = self.data_files[idx].ix[indexes,:]
-      # store the data index for the cnv regions
-      self._cnv_regions[idx] = self.data_files[idx].index.values
-      # update the data stored on the model 
-      self.model[self.working_distn].states_list[idx].stateseq = self.model[self.working_distn].states_list[idx].stateseq[ self._cnv_regions[idx] ]
-      self.model[self.working_distn].states_list[idx].data = self.model[self.working_distn].states_list[idx].data[ self._cnv_regions[idx] ]
-      # reset index of table
-      if hasattr(self.data_files[idx] ,'level_0'):
-	self.data_files[idx] = self.data_files[idx].drop(['level_0'],1)
-      self.data_files[idx] = self.data_files[idx].reset_index()
-      
-    if burnin > 0:
-      self.sample(niter = burnin,save=False)
-    if niter > 0:
-      self.sample(niter = niter,save=True, freq = freq)
-      self.process_samples()
-
-  def unfocus_on_cnvs(self,burnin=0,niter=0,freq=0):
-    # save the state seq we have now
-    focused_state_list = copy.deepcopy(self.model[self.working_distn].states_list)
-    # recover the last states sequence
-    self.model[self.working_distn].states_list = copy.deepcopy(self._last_states_list)
-    # empy this object
-    self._last_states_list = []
-    # overwritte the states values for the data points were we have been working on
-    for idx in xrange(len(self.model[self.working_distn].states_list)):
-      self.model[self.working_distn].states_list[idx].stateseq[ self._cnv_regions[idx] ] = focused_state_list[idx].stateseq
-
-    # recover the original data
-    self.data_files = self._original_data
-    self._original_data = {}
-    
-    # if desire run some sampling to update the genome wide estimates
-    if burnin > 0:
-      self.sample(niter = burnin,save=False)
-    if niter > 0:
-      self.sampled_models = {}
-      self.sample(niter = niter,save=True, freq = freq)
-      self.process_samples()
-
-    
-  def _sample_nbinomial(r,p,size=1):
-    return np.random.poisson(np.random.gamma(r,p/(1-p),size=size))
-
-    
-  def set_files(self,files):
-    self.files = files
-  
-  def define_prior_distns(self,distn = None):
-    import pyhsmm
-    if distn is not None:
-      self.working_distn = distn
-    if self.working_distn == 'negative_binomial':
-      self.prior_dist[self.working_distn] = [pyhsmm.distributions.NegativeBinomial(**self.hyper_priors[self.working_distn]) for idx in range(self.Nmax)]
-    elif self.working_distn == 'gaussian':
-      self.prior_dist[self.working_distn] = [pyhsmm.distributions.ScalarGaussianNonconjNIX(**self.hyper_priors[self.working_distn]) for idx in range(self.Nmax)]
-    else:
-      print "Distribution [",distn,"] not implemented"
-    
-
-  def set_Nmax(self,Nmax = 0):
-    if Nmax == 0:
-      alldata = np.hstack([ self.data[i] for i in self.data.keys()])
-      self.Nmax = int(2*(round(alldata.max()/np.mean(alldata)/0.5) + 1))
-      # put a limit on the maximun number of states
-      if self.Nmax > 100:
-	  self.Nmax = 100
-    else:
-      self.Nmax = Nmax
-
-    print time.ctime(), "Using weak limit approximation with a max number of states of [",self.Nmax,"]"
-
-
-  def set_hyperpriors(self,distn = None,tolerance = 0.4):
-    import pyhsmm
-    import pyhsmm.basic.distributions as distributions
-    import scipy as sp
-    import scipy.stats as ss
-    import random
-    if distn is not None:
-      self.working_distn = distn
-
-    print time.ctime(), "Getting hyper-prior estimates from the data"
-    alldata = np.hstack([ self.data[i] for i in self.data.keys()])
-    ratio = alldata[alldata > 0]/np.median(alldata[alldata > 0])
-    alldata = alldata[ratio > 0.5 + tolerance ]
-    ratio = alldata[alldata > 0]/np.median(alldata[alldata > 0])
-    alldata = alldata[ratio < 1.5 - tolerance]
-    if self.working_distn == 'negative_binomial':
-      Rs = []
-      Ps = []
-      nb = distributions.NegativeBinomial(5.,20.,1.,1.)
-      small_data = np.array(random.sample(alldata,1000))# np.random.poisson(np.median(alldata),100) 
-      # short burnin
-      for i in xrange(1000):
-	  nb.resample(small_data)
-
-      for i in xrange(100):
-	  nb.resample(small_data)
-	  Rs.append(nb.r)
-	  Ps.append(nb.p)
-      fit_gamma_alpha,fit_gamma_loc,fit_gamma_beta=ss.gamma.fit(Rs,loc=0,scale=1)
-      fit_beta_a, fit_beta_b, fit_beta_loc, fit_beta_scale = ss.beta.fit(Ps,loc=0,scale=1)
-      self.hyper_priors[self.working_distn] = { 'k_0': fit_gamma_alpha, 'theta_0': fit_gamma_beta , 'alpha_0': fit_beta_a, 'beta_0': fit_beta_b }
-    elif self.working_distn == 'gaussian':
-      log_data = np.log(alldata[alldata > 0])
-      del(alldata)
-      means_prior_mean = np.mean(log_data)
-      means_prior_sd = np.max(log_data) - 0
-      self.hyper_priors[self.working_distn] = dict(
-                      mu_0=means_prior_mean, tausq_0=(1/(means_prior_sd**2)), # mean and variance for the prior for means
-                      sigmasq_0=1, nu_0=1) # mean and number of pseudo-observations for variances
-                                                   # setting a large sigmasq_0 and increasing
-                                                   # nu_0 will encourage less segmentation
-      
-    print time.ctime(), "   ... done"
-
-  def read_data(self):
-    datatospread = {}
-    counter = 0
-    print time.ctime(), len(self.files), " files to load"
-    data_files = {}
-    for file in self.files:
-      table = pd.read_table(file,names=['chr','start','end','counts'],sep="\t",na_values=['inf','-inf'])
-      table['counts'] = table['counts'].astype(float)
-      # FILTER DATA
-      # TODO: this set NaN to 0 needs to change later on!!!
-      table = table.fillna(0)
-      # remove infinity values
-      table = table[table['counts'] != np.inf]
-      table = table[table['counts'] != -np.inf]
-      # rm rows with counts == 0
-      table = table[table['counts'].isnull() == False]
-      # rm values < 0. This may need to change if using some other type of values
-      table = table[table['counts'] >=0 ]
-      # remove values way off the mean?
-      #table = table[table['counts'] < 1000*np.median(table['counts'][table['counts'] > 0] )]
-      if self.split_in_gaps:
-	steps = np.diff(table['start'].values)
-	normal_step = np.median(steps)
-	if self.min_gap_size > 0:
-	  normal_step = self.min_gap_size
-	group_size = 0
-	labels = np.zeros_like(table['counts'].values)
-	for i,s in enumerate(steps):
-	  if s > normal_step:
-	    if group_size > self.min_group_size:
-	      counter +=1
-	      group_size = 0
-	    else:
-	      group_size += 1
-	  else:
-	    group_size +=1
-	  labels[i] = counter
-	# assign last data point to last cluster
-	labels[-1] = counter
-	# assign data to dict to load in data model later on
-	table['group_label'] = labels
-	if group_size < self.min_group_size:
-	  table['group_label'][table['group_label'] == counter] = counter-1
-	  counter -= 1
-      else:
-	table['group_label'] = counter
-	
-      table_grouped = table.groupby('group_label')
-
-      groups_idx = table_grouped.indices
-      for idx in groups_idx:
-	idx = int(idx)
-	datatospread[idx] = table['counts'][ groups_idx[idx] ].values.astype(int)
-	data_files[idx] = table_grouped.get_group( idx ).reset_index()
-      # increase counter to start new files on new data sequence
-      counter += 1
-
-    self.data = datatospread
-    self.data_files = data_files    
-    self.Ndata_files = len(self.data)
-    self.rename_chromosomes()
-    print time.ctime(), "   data loaded into [",len(self.data),"] data sequences"
-    print time.ctime(), "   ... done"
-
-  def rename_chromosomes(self):
-    alt_name_dict = {'X' : 23,'Y' : 24,'XY':25,'MT':26,'20':202}
-    for idx in self.data_files.keys():
-      for alt_name in alt_name_dict.keys():
-	self.data_files[idx].chr.values[self.data_files[idx].chr.values.astype(str) == alt_name] = alt_name_dict[alt_name]
-
-
-  def make_bed(self,filename):
-    calls = pd.concat(self.cnv_call[self.working_distn].values())
-    calls = calls.sort(['chromosome','start'])
-    cols_to_print = ['chromosome','start','end','n_dat_point','size','mean_cnv','lower_ci_cnv','upper_ci_cnv','cnv_post_prob','cnv_post_prob_lower_ci','cnv_post_prob_upper_ci','genotype']
-    calls.ix[:,cols_to_print].to_csv(filename, header=True,index=None,sep="\t")
-
-  def print_summary(self,filename,how='all'):
-    data = pd.concat(self.data_files.values())
-    if how == 'all':
-      out = ''.join((filename,'.cnv_calls_summary.txt'))
-      data.to_csv(out,header=True,index=None,sep="\t")
-    elif how == 'by_chr':
-      index = data.groupby('chr').indices
-      for chr_id in index.keys():
-	out = ''.join((filename,'.cnv_calls_summary.',str(chr_id),'.txt'))
-	data.ix[index[chr_id],:].to_csv(out,header=True,index=None,sep="\t")
-
-  
-
-  def find_segment_positions(data_files, prob=0.9, min_size=0,max_size=np.inf, observed_states = observed_states, genome_average = genome_average_mean,log_w=log_w,states_means=states_means):
-    calls = {}
-    for file_idx in data_files.keys():
-      f = data_files[file_idx]
-      cnv_regions = f[f.post_prob_of_cnv > prob]
-      step = np.diff(cnv_regions.start.values)
-      normal_step = np.median(step)
-      cnv_regions['step'] = 0
-      cnv_regions['step'][1:] = step
-      ids = 0
-      cnv_regions['cnv_id'] = 0
-      for i in xrange(len(cnv_regions)):
-	true_row_id = cnv_regions.irow(i).name 
-	if cnv_regions.ix[true_row_id,'step'] > normal_step:
-	  ids +=1
-	cnv_regions.ix[true_row_id,'cnv_id'] = ids
-
-      cnv_regions = cnv_regions.groupby('cnv_id').apply(lambda x: summarize_cnv_regions(x,observed_states[file_idx],genome_average = genome_average_mean,log_w=log_w,states_means=states_means))
-      cnv_regions.columns = ['chromosome','start','end','n_dat_point','size','mean_cnv','lower_ci_cnv','upper_ci_cnv','cnv_post_prob','cnv_post_prob_lower_ci','cnv_post_prob_upper_ci']
-      cnv_regions['genotype'] = np.round(cnv_regions['mean_cnv']/0.5)*0.5
-      # filter
-      cnv_regions = cnv_regions[cnv_regions.size > min_size]
-      cnv_regions = cnv_regions[cnv_regions.size < max_size]
-      calls[file_idx] = cnv_regions.reset_index()
-    return calls
-
-      
-  def _changepoint_indicators_from_stateseq(stateseq):
-      '''
-      returns an binary array "ret" of same size as stateseq
-      where ret[i]=1 iff there's a changepoint between times i-1 and i
-      '''
-      return np.concatenate(( (0,), np.diff(stateseq) != 0 ))
-
-  def approx_k_0(x):
-    ''' Will make the asumption that p = 0.5, corresponding to a flat prior on p, i.e., Beta(1,1).
-	and that theta_0 = k_0/2
-    '''
-    return np.sqrt(mean_counts*2) + 0.001
-
-
-  def bma(mu, sd, pr_M):
-    average = np.average(mu, weights=pr_M)
-    var_avg = -1*average**2
-    for k in xrange(len(pr_M)):
-      var_avg += (sd[k] + mu[k]**2)*pr_M[k]
-    return average, var_avg
-
-  def weighted_avg_and_std(self,values, weights):
-      """
-	  Returns the weighted average and standard deviation.
-	  
-	  values, weights -- Numpy ndarrays with the same shape.
-	  """
-      average = np.average(values, weights=weights)
-      variance = np.dot(weights, (values-average)**2)/weights.sum()  # Fast and numerically precise
-      return average, variance
-    
-  def _fixed_effect_combined_stat(self,mu,sigma,w):
-    b_fixed = np.dot(mu,w)/w.sum()
-    v_fixed = np.dot(w*w,sigma)
-    return b_fixed, v_fixed
-    
-    
-def unique_rows2(A):
-  import pandas as pd
-  return pd.DataFrame(A.T).drop_duplicates().T.values
-   
-def ratio_of_gaussians(mu_z,sigma_z,mu_w,sigma_w,rho):
-  s = rho * sigma_z/sigma_w
-  h = sigma_z *((1 - rho*rho)**0.5)
-  r = sigma_w/h
-  a =  (mu_z - s*mu_w)/h  #(mu_z/sigma_z - rho*(mu_w/sigma_w))/(1-rho*rho)**0.5
-  b = mu_w/sigma_w
-  if a < 0:
-    a = -a
-    r = -r
-  r_mu = a/(1.01*b - 0.2713)
-  r_sigma = (a**2 + 1)/(b**2 + 0.108*b  - 3.795) - r_mu**2 
-  r_mu = s+r_mu/r
-  r_sigma = np.sqrt(r_sigma)/abs(r)
-  return r_mu, r_sigma
-
-
-def g_t(t,mu_z,sig_z,mu_w,sig_w,rho):
-  v=np.sqrt(sig_z*sig_z-2*t*sig_z*sig_w*rho+t*t*sig_w*sig_w)
-  w=(t*mu_w-mu_z)/v
-  v=(sig_z*(mu_w*sig_z-mu_z*rho*sig_w)+t*sig_w*(mu_z*sig_w-sig_z*rho*mu_w))/(v*v*v)
-  return .39894228*exp(-.5*w*w)*v
-
-def marsaglia():
-  muz=30.5;muw=32;sigz=5;sigw=4;rho=.8;
-  s=rho*sigz/sigw;
-  r=sigw/(sigz*sqrt(1-rho*rho));
-  b=muw/sigw;
-  a=(muz/sigz-rho*muw/sigw)/sqrt(1-rho*rho);
-  if(a<0):
-    a=-a;
-    r=-r;
-  print 'a=',a,';b=',b, ';s=',s,';r=',r
-  appmu=a/(1.01*b-.2713);
-  appsig=np.sqrt((a*a+1)/(b*b+.108*b-3.795)-appmu*appmu);
-  return appmu, appsig
-
-
-
-

File normalise_counts.py

+#import numpy as np
+#import os
+#import pandas as pd
+#import pybedtools
+#from Bio import SeqIO
+#import sys
+
+
+import argparse
+#import functions
+
 import numpy as np
-import os
 import pandas as pd
-import pybedtools
-from Bio import SeqIO
-import sys
+import pysam
+import time
 
-def mean_from_wig(chr_name,start,end):
-  return np.median([ data[2] for data in bw.get(chr_name,int(start), int(end))  ])
+print time.ctime(), "Started"
 
-def normalise_by_gc(table):
-  table['coverage'] + 1
-  median_all_gc = np.median(table.ix[table['coverage']>0,'coverage'])
-  table['int_pct_gc'] = (bins_plus_nucl_comp['pct_gc']*100).astype(int)
-  median_by_gc = table.groupby('int_pct_gc').apply(lambda x: np.median(x['coverage']))
-  table['coverage_gc'] = table.apply(lambda x: x['coverage']*(median_all_gc/median_by_gc[x['int_pct_gc']]),axis=1)
-
-  #table['mean_mappability'] = table.apply(  lambda x:  mean_from_wig(x['chr'], x['start'],x['end']),  axis=1)
-  #table = table.fillna(0)
-  #table['mean_mappability'] = (table['mean_mappability']*100).astype(int)
-  #median_all_mappability = np.median(table.ix[table['coverage_gc']>0,'coverage_gc'])
-  #median_by_mappability = table.groupby('mean_mappability').apply(lambda x: np.median(x['coverage_gc']))
-  #table['coverage_gc_mappability'] = table.apply(lambda x: x['coverage_gc']*(median_all_mappability/median_by_mappability[x['mean_mappability']]),axis=1)
-  table['normalised'] = table['coverage_gc']
-  #table = table.fillna(0)
-  return table
-
-def read_cov_table(file):
-  table = pd.read_table(file, names=['chr','start','end','coverage','cov_bp','region_length','frac_bases_covered'])
-  #table['bin_mean_pos'] = (table['start'] + table['end'])/2
-  table['log_cov'] = np.log10(table['coverage'] + 1)
-  #table = table.reindex_axis(table.ix[['chr','start','end']],axis=1)
-  return table
+parser = argparse.ArgumentParser(description='Call Copy Number variants using a Sticky-Hidden Markov Model')
+
+# Input Files
+parser.add_argument('--bam',required=True)
+parser.add_argument('--bed',default=0)
+
+#Output files
+parser.add_argument('--output', required=True)
+
+#Options
+parser.add_argument('--chr', required=False)
+parser.add_argument('--ref', default = 0)
+parser.add_argument('--ref_folder',default = 0)
+parser.add_argument('--win_size',type=int,default=1000)
+args = parser.parse_args()
 
 def make_bins(chr,max_value,size):
   end = max_value - 1
   return b
 
 
-genome_fasta = "human_g1k_v37.fasta"
-win_size = int(sys.argv[2])
-#print 'Retrieving chromosome size from bam file'
-import pysam
-f = pysam.Samfile(sys.argv[1],'rb')
-chr_sizes = {}
-for seq in f.header['SQ']:
-    #chr_sizes[seq['SN'].strip('chr')] = seq['LN']   
-    chr_sizes[seq['SN']] = seq['LN']
-f.close()
-
-
-tmp = {}
-tmp[sys.argv[3]] = chr_sizes[sys.argv[3]]
-chr_sizes = tmp
-#handle = open(genome_fasta, "rU")
-#for record in SeqIO.parse(handle, "fasta") :
-#    chr_sizes[ record.id ] = len(record.seq) 
-#handle.close()
-
-# lets make some bins
-print "Building bins"
-bins = pd.concat([ make_bins(chr,chr_sizes[chr], win_size) for chr in chr_sizes ])
-#bins['chr'] = bins['chr'].str.strip('chr')
-bins = pybedtools.BedTool(bins.to_string(index=None,header=False),from_string=True)
-bins_bed_file = ''.join((genome_fasta.strip('fasta'),str(np.int(win_size/1000)),"kb",".bed"))
-#bins.saveas(bins_bed_file) 
-#calculate coverage
-bam = sys.argv[1] #'/home/inti/APP/forestSV/inst/scripts/NA12878.chrom20.ILLUMINA.bwa.CEU.high_coverage.20100311.bam'
-
-# name output files
-kb_string = ''.join((str(np.int(win_size/1000)),"kb"))
-out = sys.argv[1].strip('bam')
-out = ''.join((out,kb_string,'.',sys.argv[3]))
-
-name_bins_coverage = ''.join((out,".coverage.bed"))
-name_bins_nuc_comp = ''.join((out,".nclcomposition.bed"))
-name_normalised = ''.join((out,".normalized.bed"))
-
-bam = pybedtools.BedTool(bam)
-print "Calculating coverage for each bin"
-
-bins_coverage = bam.coverage(bins).saveas( name_bins_coverage )
-# calculate the gc content and other nucleodite composition features of the bins
-print "Calculating nucleotide content for each bin"
-#withchr = '/home/inti/RESOURCES/fasta/human_g1k_v37.fasta'
-nochr='human_g1k_v37.fasta'
-bins_nuc_comp = bins_coverage.nucleotide_content(nochr).saveas(name_bins_nuc_comp) 
-
-#from bx.bbi.bigwig_file import BigWigFile
-#bw = BigWigFile( open("wgEncodeCrgMapabilityAlign100mer.bigWig") )
-
-bins_plus_nucl_comp = pd.read_table(name_bins_nuc_comp,names=['chr','start','end','coverage','cov_bp','region_length','frac_bases_covered','pct_at','pct_gc','num_A','num_C','num_G','num_T','num_N','num_oth','seq_len'])
-bins_plus_nucl_comp = bins_plus_nucl_comp[bins_plus_nucl_comp['num_N'] == 0]
-print "Normalizing by GC content"
-bins_plus_nucl_comp = normalise_by_gc(bins_plus_nucl_comp)
-bins_plus_nucl_comp = bins_plus_nucl_comp.sort(['chr', 'start'], ascending=[1, 1])
-bins_plus_nucl_comp[['chr','start','end','normalised']].fillna(0).to_csv(name_normalised,header=None,index=None,sep="\t")
-print "Done"
+def calculate_coverage(pysam_obj,bed):
+  return bed.apply( lambda x: pysam_obj.count(reference=str(int(x['chr'])),start=int(x['start']),end=int(x['end']))   ,axis=1  )
+
+def calculate_gc_content(pysam_fasta_obj,bed):
+  return bed.apply( lambda x: gc_content(ref.fetch(reference=str(int(x['chr'])),start=int(x['start']),end=int(x['end']) )) ,axis=1  )
 
+def gc_content(seq):
+  return np.sum([seq.count(letter) for letter in ['G','C'] ])/float(len(seq))
+
+def normalise_by_gc(table):
+  table['coverage'] + 1
+  median_all_gc = np.median(table.ix[table['coverage']>0,'coverage']).astype(int)
+  table['gc_intervals'] = (table['gc']*100).astype(int)
+  median_by_gc = table.groupby('gc_intervals').apply(lambda x: np.median(x['coverage']))
+  table['normalised'] = table.coverage.values * median_all_gc/median_by_gc[table['gc_intervals']].values
+  table = table.fillna(0)
+  table.ix[np.where((table['normalised'] == np.inf) == True)[0],'normalised'] = 0.0
+  return table['normalised']
+
+print time.ctime(), "Reading bam file"
+
+bam = pysam.Samfile(str(args.bam)) #('NA12878/NA12878.mapped.ILLUMINA.bwa.CEU.low_coverage.20120522.bam')
+ref_filename = bam.header['SQ'][0]['UR'].split('/')[-1].split(' ')[0]
+
+
+if args.bed == 0:
+  chr_sizes = {}
+  for seq in bam.header['SQ']:
+      chr_sizes[seq['SN']] = seq['LN']
+  tmp = {}
+  tmp[str(args.chr)] = chr_sizes[str(args.chr)]
+  chr_sizes = tmp
+  print time.ctime(), "Building bins"
+  bed = pd.concat([ make_bins(chr,chr_sizes[chr], args.win_size) for chr in chr_sizes ])
+
+else:
+  print time.ctime(), "Reading bed file"
+  bed = pd.read_table(str(args.bed))#('NA12878.1kb.chr20.cnv_calls_summary.txt')
+
+print time.ctime(), "Calculating coverage"
+bed['coverage'] = calculate_coverage(bam,bed)
+
+print time.ctime(), "Reading reference genome fasta"
+ref = ''
+if args.ref != 0:
+  ref = pysam.Fastafile(args.ref) #/home/inti/RESOURCES/ref_genomes/hs37d5.fa.gz
+else:
+  ref = pysam.Fastafile(''.join((args.ref_folder,ref_filename)))
+
+print time.ctime(), "Calculating GC content for bins"
+bed['gc'] = calculate_gc_content(ref,bed)
+print time.ctime(), "Normalizing by GC content"
+bed['counts']=normalise_by_gc(bed)
+bed = bed[['chr','start','end','coverage','counts']]
+bed.to_csv(args.output,header=None,index=None,sep="\t")
+print time.ctime(),"Done"

File resources/gap.complement.bed

+1	10000	177417
+1	227417	267719
+1	317719	471368
+1	521368	2634220
+1	2684220	3845268
+1	3995268	13052998
+1	13102998	13219912
+1	13319912	13557162
+1	13607162	17125658
+1	17175658	29878082
+1	30028082	103863906
+1	103913906	120697156
+1	120747156	120936695
+1	121086695	121485434
+1	142535434	142731022
+1	142781022	142967761
+1	143117761	143292816
+1	143342816	143544525
+1	143644525	143771002
+1	143871002	144095783
+1	144145783	144224481
+1	144274481	144401744
+1	144451744	144622413
+1	144672413	144710724
+1	144810724	145833118
+1	145883118	146164650
+1	146214650	146253299
+1	146303299	148026038
+1	148176038	148361358
+1	148511358	148684147
+1	148734147	148954460
+1	149004460	149459645
+1	149509645	205922707
+1	206072707	206332221
+1	206482221	223747846
+1	223797846	235192211
+1	235242211	248908210
+1	249058210	249240621
+2	10000	3529312
+2	3579312	5018788
+2	5118788	16279724
+2	16329724	21153113
+2	21178113	87668206
+2	87718206	89630436
+2	89830436	90321525
+2	90371525	90545103
+2	91595103	92326171
+2	95326171	110109337
+2	110251337	149690582
+2	149790582	234003741
+2	234053741	239801978
+2	239831978	240784132
+2	240809132	243102476
+2	243152476	243189373
+3	60000	66170270
+3	66270270	90504854
+3	93504854	194041961
+3	194047251	197962430
+4	10000	1423146
+4	1478646	8799203
+4	8818203	9274642
+4	9324642	31820917
+4	31837417	32834638
+4	32840638	40296396
+4	40297096	49338941
+4	49488941	49660117
+4	52660117	59739333
+4	59789333	75427379
+4	75452279	191044276
+5	10000	17530657
+5	17580657	46405641
+5	49405641	91636128
+5	91686128	138787073
+5	138837073	155138727
+5	155188727	180905260
+6	60000	58087659
+6	58137659	58780166
+6	61880166	62128589
+6	62178589	95680543
+6	95830543	157559467
+6	157609467	157641300
+6	157691300	167942073
+6	168042073	170279972
+6	170329972	171055067
+7	10000	232484
+7	282484	50370631
+7	50410631	58054331
+7	61054331	61310513
+7	61360513	61460465
+7	61510465	61677020
+7	61727020	61917157
+7	61967157	74715724
+7	74765724	100556043
+7	100606043	130154523
+7	130254523	139379377
+7	139404377	142048195
+7	142098195	142276197
+7	142326197	143347897
+7	143397897	154270634
+7	154370634	159128663
+X	60000	94821
+X	144821	231384
+X	281384	1047557
+X	1097557	1134113
+X	1184113	1264234
+X	1314234	2068238
+X	2118238	7623882
+X	7673882	10738674
+X	10788674	37098256
+X	37148256	49242997
+X	49292997	49974173
+X	50024173	52395914
+X	52445914	58582012
+X	61682012	76653692
+X	76703692	113517668
+X	113567668	115682290
+X	115732290	120013235
+X	120063235	143507324
+X	143557324	148906424
+X	148956424	149032062
+X	149082062	152277099
+X	152327099	155260560
+8	10000	7474649
+8	7524649	12091854
+8	12141854	43838887
+8	46838887	48130499
+8	48135599	86576451
+8	86726451	142766515
+8	142816515	145332588
+8	145432588	146304022
+9	10000	39663686
+9	39713686	39974796
+9	40024796	40233029
+9	40283029	40425834
+9	40475834	40940341
+9	40990341	41143214
+9	41193214	41365793
+9	41415793	42613955
+9	42663955	43213698
+9	43313698	43946569
+9	43996569	44676646
+9	44726646	44908293
+9	44958293	45250203
+9	45350203	45815521
+9	45865521	46216430
+9	46266430	46461039
+9	46561039	47060133
+9	47160133	47317679
+9	65467679	65918360
+9	65968360	66192215
+9	66242215	66404656
+9	66454656	66614195
+9	66664195	66863343
+9	66913343	67107834
+9	67207834	67366296
+9	67516296	67987998
+9	68137998	68514181
+9	68664181	68838946
+9	68988946	69278385
+9	69328385	70010542
+9	70060542	70218729
+9	70318729	70506535
+9	70556535	70735468
+9	70835468	92343416
+9	92443416	92528796
+9	92678796	133073060
+9	133223060	137041193
+9	137091193	139166997
+9	139216997	141153431
+10	60000	17974675
+10	18024675	38818835
+10	38868835	39154935
+10	42354935	42546687
+10	42596687	46426964
+10	46476964	47429169
+10	47529169	47792476
+10	47892476	48055707
+10	48105707	49095536
+10	49195536	51137410
+10	51187410	51398845
+10	51448845	125869472
+10	125919472	128616069
+10	128766069	133381404
+10	133431404	133677527
+10	133727527	135524747
+11	60000	1162759
+11	1212759	50783853
+11	51090853	51594205
+11	54694205	69089801
+11	69139801	69724695
+11	69774695	87688378
+11	87738378	96287584
+11	96437584	134946516
+12	60000	95739
+12	145739	7189876
+12	7239876	34856694
+12	37856694	109373470
+12	109423470	122530623
+12	122580623	132706992
+12	132806992	133841895
+13	19020000	86760324
+13	86910324	112353994
+13	112503994	114325993
+13	114425993	114639948
+13	114739948	115109878
+14	19000000	107289540
+15	20000000	20894633
+15	20935075	21398819
+15	21885000	22212114
+15	22262114	22596193
+15	22646193	23514853
+15	23564853	29159443
+15	29209443	82829645
+15	82879645	84984473
+15	85034473	102521392
+16	60000	8636921
+16	8686921	34023150
+16	34173150	35285801
+16	46385801	88389383
+16	88439383	90294753
+17	0	296626
+17	396626	21566608
+17	21666608	22263006
+17	25263006	34675848
+17	34725848	62410760
+17	62460760	77546461
+17	77596461	79709049
+17	79759049	81195210
+18	10000	15410898
+18	18510898	52059136
+18	52209136	72283353
+18	72333353	75721820
+18	75771820	78017248
+20	60000	26319569
+20	29419569	29653908
+20	29803908	34897085
+20	34947085	61091437
+20	61141437	61213369
+20	61263369	62965520
+Y	10000	44821
+Y	94821	181384
+Y	231384	997557
+Y	1047557	1084113
+Y	1134113	1214234
+Y	1264234	2018238
+Y	2068238	8914955
+Y	8964955	9241322
+Y	9291322	10104553
+Y	13104553	13143954
+Y	13193954	13748578
+Y	13798578	20143885
+Y	20193885	22369679
+Y	22419679	23901428
+Y	23951428	28819361
+Y	58819361	58917656
+Y	58967656	59363566
+19	60000	7346004
+19	7396004	8687198
+19	8737198	20523415
+19	20573415	24631782
+19	27731782	59118983
+22	16050000	16697850
+22	16847850	20509431
+22	20609431	50364777
+22	50414777	51244566
+21	9411193	9595548
+21	9645548	9775437
+21	9825437	10034920
+21	10084920	10215976
+21	10365976	10647896
+21	10697896	11188129
+21	14338129	42955559
+21	43005559	44632664
+21	44682664	48119895
+6_ssto_hap7	0	861800
+6_ssto_hap7	941242	1727717
+6_ssto_hap7	1744408	1836632
+6_ssto_hap7	1838892	1983561
+6_ssto_hap7	1997824	2052735
+6_ssto_hap7	2087415	2101474
+6_ssto_hap7	2113875	2138491
+6_ssto_hap7	2303018	2411247
+6_ssto_hap7	2461193	3040184
+6_ssto_hap7	3054966	3093568
+6_ssto_hap7	3154699	3191196
+6_ssto_hap7	3214366	3368931
+6_ssto_hap7	3445987	4370564
+6_ssto_hap7	4420564	4580410
+6_ssto_hap7	4610218	4639806
+6_ssto_hap7	4661556	4723509
+6_ssto_hap7	4774367	4783798
+6_ssto_hap7	4836049	4928567
+6_mcf_hap5	0	135141
+6_mcf_hap5	138043	179429
+6_mcf_hap5	186757	229133
+6_mcf_hap5	282732	383988
+6_mcf_hap5	404415	491590
+6_mcf_hap5	560421	676599
+6_mcf_hap5	685588	994250
+6_mcf_hap5	1009296	1177759
+6_mcf_hap5	1292220	1562952
+6_mcf_hap5	1726699	1815769
+6_mcf_hap5	1838106	2091199
+6_mcf_hap5	2189158	2241824
+6_mcf_hap5	2251998	2300850
+6_mcf_hap5	2305647	2712451
+6_mcf_hap5	2811903	2947576
+6_mcf_hap5	2958999	3154498
+6_mcf_hap5	3188719	3329672
+6_mcf_hap5	3355524	3602941
+6_mcf_hap5	3659279	3899294
+6_mcf_hap5	3929849	4149579
+6_mcf_hap5	4149626	4284353
+6_mcf_hap5	4302274	4422172
+6_mcf_hap5	4594253	4833398
+6_cox_hap2	0	4795371
+6_mann_hap4	0	145852
+6_mann_hap4	184243	1388812
+6_mann_hap4	1438812	2047420
+6_mann_hap4	2056121	2252213
+6_mann_hap4	2277026	2312291
+6_mann_hap4	2324610	2517052
+6_mann_hap4	2540962	2772006
+6_mann_hap4	2810953	2921096
+6_mann_hap4	2929556	3062107
+6_mann_hap4	3151453	3174271
+6_mann_hap4	3227203	3261361
+6_mann_hap4	3374492	3412258
+6_mann_hap4	3450867	3553455
+6_mann_hap4	3557741	3895092
+6_mann_hap4	3926341	4236950
+6_mann_hap4	4245095	4441658
+6_mann_hap4	4480941	4683263
+6_apd_hap1	0	207807
+6_apd_hap1	252060	327074
+6_apd_hap1	356458	448574
+6_apd_hap1	536172	588761
+6_apd_hap1	692953	944435
+6_apd_hap1	963558	970040
+6_apd_hap1	1029417	1173934
+6_apd_hap1	1198528	1307951
+6_apd_hap1	1317697	1341208
+6_apd_hap1	1344139	1451581
+6_apd_hap1	1556172	1582549
+6_apd_hap1	1598989	1736596
+6_apd_hap1	1786349	1833811
+6_apd_hap1	1937682	2009922
+6_apd_hap1	2164480	2169002
+6_apd_hap1	2254169	2359974
+6_apd_hap1	2761234	2821679
+6_apd_hap1	2858431	2894657
+6_apd_hap1	3037482	3122973
+6_apd_hap1	3136480	3180323
+6_apd_hap1	3355295	3370179
+6_apd_hap1	3415446	3491066
+6_apd_hap1	3594101	3638667
+6_apd_hap1	3642320	3696623
+6_apd_hap1	3736194	3762870
+6_apd_hap1	3766192	3790366
+6_apd_hap1	3970602	4220467
+6_apd_hap1	4274176	4390029
+6_apd_hap1	4597885	4622290
+6_qbl_hap6	0	521900
+6_qbl_hap6	681210	2680914
+6_qbl_hap6	2732064	3023176
+6_qbl_hap6	3049607	3316246
+6_qbl_hap6	3369039	3993031
+6_qbl_hap6	4020006	4611984
+6_dbb_hap3	0	138055
+6_dbb_hap3	245960	622457
+6_dbb_hap3	640380	1337861
+6_dbb_hap3	1344119	1836690
+6_dbb_hap3	1837200	2117295
+6_dbb_hap3	2119813	2606257
+6_dbb_hap3	2723615	3407000
+6_dbb_hap3	3481304	3736386
+6_dbb_hap3	3772614	4206542
+6_dbb_hap3	4226964	4354126
+6_dbb_hap3	4376794	4610396
+17_ctg5_hap1	0	1256794
+17_ctg5_hap1	1306794	1588968
+17_ctg5_hap1	1638968	1680828
+4_ctg9_hap1	0	590426
+1_gl000192_random	0	547496
+Un_gl000225	0	211173
+4_gl000194_random	0	191469
+4_gl000193_random	0	189789
+9_gl000200_random	0	187035
+Un_gl000222	0	186861
+Un_gl000212	0	186858
+7_gl000195_random	0	182896
+Un_gl000223	0	180455
+Un_gl000224	0	179693
+Un_gl000219	0	179198
+17_gl000205_random	0	174588
+Un_gl000215	0	172545
+Un_gl000216	0	172294
+Un_gl000217	0	172149
+9_gl000199_random	0	169874
+Un_gl000211	0	166566
+Un_gl000213	0	164239
+Un_gl000220	0	161802
+Un_gl000218	0	161147
+19_gl000209_random	0	159169
+Un_gl000221	0	155397
+Un_gl000214	0	137718
+Un_gl000228	0	129120
+Un_gl000227	0	128374
+1_gl000191_random	0	106433
+19_gl000208_random	0	92689
+9_gl000198_random	0	90085
+17_gl000204_random	0	81310
+Un_gl000233	0	45941
+Un_gl000237	0	45867
+Un_gl000230	0	43691
+Un_gl000242	0	43523
+Un_gl000243	0	43341
+Un_gl000241	0	42152
+Un_gl000236	0	41934
+Un_gl000240	0	41933
+17_gl000206_random	0	41001
+Un_gl000232	0	40652
+Un_gl000234	0	40531
+11_gl000202_random	0	40103
+Un_gl000238	0	39939
+Un_gl000244	0	39929
+Un_gl000248	0	39786
+8_gl000196_random	0	38914
+Un_gl000249	0	38502
+Un_gl000246	0	38154
+17_gl000203_random	0	37498
+8_gl000197_random	0	37175
+Un_gl000245	0	36651
+Un_gl000247	0	36422
+9_gl000201_random	0	36148
+Un_gl000235	0	34474
+Un_gl000239	0	33824
+21_gl000210_random	0	27682
+Un_gl000231	0	27386
+Un_gl000229	0	19913
+M	0	16571
+Un_gl000226	0	15008
+18_gl000207_random	0	4262

File resources/gap_withchr.bed

+chr1	0	10000	1	N	10000	telomere	no
+chr1	177417	227417	4	N	50000	clone	yes
+chr1	267719	317719	6	N	50000	contig	no
+chr1	471368	521368	8	N	50000	contig	no
+chr1	2634220	2684220	32	N	50000	clone	yes
+chr1	3845268	3995268	47	N	150000	contig	no
+chr1	13052998	13102998	151	N	50000	clone	yes
+chr1	13219912	13319912	154	N	100000	contig	no
+chr1	13557162	13607162	157	N	50000	clone	yes
+chr1	17125658	17175658	196	N	50000	clone	yes
+chr1	29878082	30028082	337	N	150000	contig	no
+chr1	103863906	103913906	1088	N	50000	clone	yes
+chr1	120697156	120747156	1263	N	50000	clone	yes
+chr1	120936695	121086695	1265	N	150000	contig	no
+chr1	121485434	121535434	1269	N	50000	clone	no
+chr1	121535434	124535434	1270	N	3000000	centromere	no
+chr1	124535434	142535434	1271	N	18000000	heterochromatin	no
+chr1	142731022	142781022	1273	N	50000	clone	yes
+chr1	142967761	143117761	1275	N	150000	contig	no
+chr1	143292816	143342816	1277	N	50000	clone	yes
+chr1	143544525	143644525	1279	N	100000	contig	no
+chr1	143771002	143871002	1281	N	100000	contig	no
+chr1	144095783	144145783	1285	N	50000	contig	no
+chr1	144224481	144274481	1287	N	50000	contig	no
+chr1	144401744	144451744	1289	N	50000	clone	yes
+chr1	144622413	144672413	1291	N	50000	contig	no
+chr1	144710724	144810724	1293	N	100000	clone	yes
+chr1	145833118	145883118	1307	N	50000	clone	yes
+chr1	146164650	146214650	1310	N	50000	clone	yes
+chr1	146253299	146303299	1312	N	50000	clone	yes
+chr1	148026038	148176038	1328	N	150000	contig	no
+chr1	148361358	148511358	1331	N	150000	contig	no
+chr1	148684147	148734147	1333	N	50000	clone	yes
+chr1	148954460	149004460	1337	N	50000	clone	yes
+chr1	149459645	149509645	1342	N	50000	clone	yes
+chr1	205922707	206072707	1875	N	150000	contig	no
+chr1	206332221	206482221	1878	N	150000	contig	no
+chr1	223747846	223797846	2038	N	50000	clone	yes
+chr1	235192211	235242211	2159	N	50000	clone	yes
+chr1	248908210	249058210	2293	N	150000	contig	no
+chr1	249240621	249250621	2302	N	10000	telomere	no
+chr10	0	10000	1	N	10000	telomere	no
+chr10	10000	60000	2	N	50000	contig	no
+chr10	17974675	18024675	161	N	50000	clone	yes
+chr10	38818835	38868835	337	N	50000	clone	yes
+chr10	39154935	39254935	340	N	100000	contig	no
+chr10	39254935	42254935	341	N	3000000	centromere	no
+chr10	42254935	42354935	342	N	100000	contig	no
+chr10	42546687	42596687	344	N	50000	clone	yes
+chr10	46426964	46476964	375	N	50000	contig	no
+chr10	47429169	47529169	387	N	100000	contig	no
+chr10	47792476	47892476	390	N	100000	contig	no
+chr10	48055707	48105707	392	N	50000	contig	no
+chr10	49095536	49195536	403	N	100000	contig	no
+chr10	51137410	51187410	419	N	50000	clone	yes
+chr10	51398845	51448845	421	N	50000	clone	yes
+chr10	125869472	125919472	1061	N	50000	clone	yes
+chr10	128616069	128766069	1088	N	150000	contig	no
+chr10	133381404	133431404	1129	N	50000	clone	yes
+chr10	133677527	133727527	1135	N	50000	clone	yes
+chr10	135524747	135534747	1156	N	10000	telomere	no
+chr11	0	10000	1	N	10000	telomere	no
+chr11	10000	60000	2	N	50000	contig	no
+chr11	1162759	1212759	14	N	50000	clone	yes
+chr11	50783853	50833853	439	N	50000	clone	no
+chr11	50833853	51040853	440	N	207000	heterochromatin	no
+chr11	51040853	51090853	441	N	50000	clone	no
+chr11	51594205	51644205	446	N	50000	clone	no
+chr11	51644205	54644205	447	N	3000000	centromere	no
+chr11	54644205	54694205	448	N	50000	clone	no
+chr11	69089801	69139801	575	N	50000	clone	yes
+chr11	69724695	69774695	583	N	50000	clone	yes
+chr11	87688378	87738378	736	N	50000	clone	yes
+chr11	96287584	96437584	808	N	150000	contig	no
+chr11	134946516	134996516	1134	N	50000	contig	no
+chr11	134996516	135006516	1135	N	10000	telomere	no
+chr12	0	10000	1	N	10000	telomere	no
+chr12	10000	60000	2	N	50000	contig	no
+chr12	95739	145739	4	N	50000	clone	yes
+chr12	7189876	7239876	66	N	50000	contig	no
+chr12	34856694	37856694	304	N	3000000	centromere	no
+chr12	109373470	109423470	954	N	50000	contig	no
+chr12	122530623	122580623	1075	N	50000	contig	no
+chr12	132706992	132806992	1171	N	100000	contig	no
+chr12	133841895	133851895	1183	N	10000	telomere	no
+chr13	0	10000	1	N	10000	telomere	no
+chr13	10000	16000000	2	N	15990000	short_arm	no
+chr13	16000000	19000000	3	N	3000000	centromere	no
+chr13	19000000	19020000	4	N	20000	heterochromatin	no
+chr13	86760324	86910324	617	N	150000	contig	no
+chr13	112353994	112503994	848	N	150000	contig	no
+chr13	114325993	114425993	866	N	100000	contig	no
+chr13	114639948	114739948	870	N	100000	contig	no
+chr13	115109878	115159878	875	N	50000	contig	no
+chr13	115159878	115169878	876	N	10000	telomere	no
+chr14	0	10000	1	N	10000	telomere	no
+chr14	10000	16000000	2	N	15990000	short_arm	no
+chr14	16000000	19000000	3	N	3000000	centromere	no
+chr14	107289540	107339540	667	N	50000	contig	no
+chr14	107339540	107349540	668	N	10000	telomere	no
+chr15	0	10000	1	N	10000	telomere	no
+chr15	10000	17000000	2	N	16990000	short_arm	no
+chr15	17000000	20000000	3	N	3000000	centromere	no
+chr15	20894633	20935075	12	N	40442	clone	yes
+chr15	21398819	21885000	16	N	486181	clone	yes
+chr15	22212114	22262114	19	N	50000	contig	no
+chr15	22596193	22646193	23	N	50000	contig	no
+chr15	23514853	23564853	31	N	50000	contig	no
+chr15	29159443	29209443	83	N	50000	contig	no
+chr15	82829645	82879645	518	N	50000	contig	no
+chr15	84984473	85034473	539	N	50000	contig	no
+chr15	102521392	102531392	690	N	10000	telomere	no
+chr16	0	10000	1	N	10000	telomere	no
+chr16	10000	60000	2	N	50000	contig	no
+chr16	8636921	8686921	128	N	50000	clone	yes
+chr16	34023150	34173150	343	N	150000	contig	no
+chr16	35285801	35335801	353	N	50000	clone	no
+chr16	35335801	38335801	354	N	3000000	centromere	no
+chr16	38335801	46335801	355	N	8000000	heterochromatin	no
+chr16	46335801	46385801	356	N	50000	clone	no
+chr16	88389383	88439383	712	N	50000	contig	no
+chr16	90294753	90344753	731	N	50000	contig	no
+chr16	90344753	90354753	732	N	10000	telomere	no
+chr17	296626	396626	4	N	100000	contig	no
+chr17	21566608	21666608	185	N	100000	contig	no
+chr17	22263006	25263006	192	N	3000000	centromere	no
+chr17	34675848	34725848	278	N	50000	contig	no
+chr17	62410760	62460760	523	N	50000	clone	yes
+chr17	77546461	77596461	644	N	50000	clone	yes
+chr17	79709049	79759049	665	N	50000	contig	no
+chr17_ctg5_hap1	1256794	1306794	12	N	50000	clone	yes
+chr17_ctg5_hap1	1588968	1638968	15	N	50000	clone	yes
+chr18	0	10000	1	N	10000	telomere	no
+chr18	15410898	15460898	124	N	50000	clone	no
+chr18	15460898	18460898	125	N	3000000	centromere	no
+chr18	18460898	18510898	126	N	50000	clone	no
+chr18	52059136	52209136	398	N	150000	contig	no
+chr18	72283353	72333353	555	N	50000	clone	yes
+chr18	75721820	75771820	585	N	50000	clone	yes
+chr18	78017248	78067248	604	N	50000	contig	no
+chr18	78067248	78077248	605	N	10000	telomere	no
+chr19	0	10000	1	N	10000	telomere	no
+chr19	10000	60000	2	N	50000	contig	no
+chr19	7346004	7396004	170	N	50000	contig	no
+chr19	8687198	8737198	190	N	50000	contig	no
+chr19	20523415	20573415	367	N	50000	clone	yes
+chr19	24631782	24681782	409	N	50000	heterochromatin	no
+chr19	24681782	27681782	410	N	3000000	centromere	no
+chr19	27681782	27731782	411	N	50000	heterochromatin	no
+chr19	59118983	59128983	870	N	10000	telomere	no
+chr2	0	10000	1	N	10000	telomere	no
+chr2	3529312	3579312	38	N	50000	contig	no
+chr2	5018788	5118788	52	N	100000	contig	no
+chr2	16279724	16329724	149	N	50000	contig	no
+chr2	21153113	21178113	192	N	25000	contig	no
+chr2	87668206	87718206	739	N	50000	clone	yes
+chr2	89630436	89830436	755	N	200000	contig	no
+chr2	90321525	90371525	760	N	50000	clone	yes
+chr2	90545103	91545103	762	N	1000000	heterochromatin	no
+chr2	91545103	91595103	763	N	50000	clone	no
+chr2	92326171	95326171	770	N	3000000	centromere	no
+chr2	110109337	110251337	893	N	142000	contig	no
+chr2	149690582	149790582	1221	N	100000	contig	no
+chr2	234003741	234053741	1942	N	50000	contig	no
+chr2	239801978	239831978	1992	N	30000	contig	no
+chr2	240784132	240809132	2001	N	25000	contig	no
+chr2	243102476	243152476	2025	N	50000	clone	yes
+chr2	243189373	243199373	2027	N	10000	telomere	no
+chr20	0	10000	1	N	10000	telomere	no
+chr20	10000	60000	2	N	50000	contig	no
+chr20	26319569	26369569	274	N	50000	clone	no
+chr20	26369569	29369569	275	N	3000000	centromere	no
+chr20	29369569	29419569	276	N	50000	clone	no
+chr20	29653908	29803908	279	N	150000	contig	no
+chr20	34897085	34947085	336	N	50000	clone	yes
+chr20	61091437	61141437	624	N	50000	clone	yes
+chr20	61213369	61263369	628	N	50000	contig	no
+chr20	62965520	63015520	648	N	50000	contig	no
+chr20	63015520	63025520	649	N	10000	telomere	no
+chr21	0	10000	1	N	10000	telomere	no
+chr21	10000	5211193	2	N	5201193	short_arm	no
+chr21	5211193	9411193	3	N	4200000	contig	no
+chr21	9595548	9645548	5	N	50000	contig	no
+chr21	9775437	9825437	7	N	50000	contig	no
+chr21	10034920	10084920	10	N	50000	contig	no
+chr21	10215976	10365976	12	N	150000	contig	no
+chr21	10647896	10697896	15	N	50000	contig	no
+chr21	11188129	11238129	20	N	50000	clone	no
+chr21	11238129	11288129	21	N	50000	heterochromatin	no
+chr21	11288129	14288129	22	N	3000000	centromere	no
+chr21	14288129	14338129	23	N	50000	clone	no
+chr21	42955559	43005559	419	N	50000	contig	no
+chr21	44632664	44682664	447	N	50000	clone	yes
+chr21	48119895	48129895	515	N	10000	telomere	no
+chr22	0	10000	1	N	10000	telomere	no
+chr22	10000	13000000	2	N	12990000	short_arm	no
+chr22	13000000	16000000	3	N	3000000	centromere	no
+chr22	16000000	16050000	4	N	50000	contig	no
+chr22	16697850	16847850	27	N	150000	contig	no
+chr22	20509431	20609431	81	N	100000	contig	no
+chr22	50364777	50414777	563	N	50000	contig	no
+chr22	51244566	51294566	591	N	50000	contig	no
+chr22	51294566	51304566	592	N	10000	telomere	no
+chr3	0	10000	1	N	10000	telomere	no
+chr3	10000	60000	2	N	50000	clone	no
+chr3	66170270	66270270	565	N	100000	contig	no
+chr3	90504854	93504854	784	N	3000000	centromere	no
+chr3	194041961	194047251	1693	N	5290	contig	no
+chr3	197962430	198012430	1732	N	50000	contig	no
+chr3	198012430	198022430	1733	N	10000	telomere	no
+chr4	0	10000	1	N	10000	telomere	no
+chr4	1423146	1478646	15	N	55500	contig	no
+chr4	8799203	8818203	83	N	19000	contig	no
+chr4	9274642	9324642	88	N	50000	clone	yes
+chr4	31820917	31837417	283	N	16500	contig	no
+chr4	32834638	32840638	294	N	6000	contig	no
+chr4	40296396	40297096	365	N	700	contig	no
+chr4	49338941	49488941	445	N	150000	contig	no
+chr4	49660117	52660117	447	N	3000000	centromere	no
+chr4	59739333	59789333	510	N	50000	contig	no
+chr4	75427379	75452279	651	N	24900	contig	no
+chr4	191044276	191144276	1657	N	100000	clone	no
+chr4	191144276	191154276	1658	N	10000	telomere	no
+chr5	0	10000	1	N	10000	telomere	no
+chr5	17530657	17580657	171	N	50000	clone	yes
+chr5	46405641	49405641	452	N	3000000	centromere	no
+chr5	91636128	91686128	873	N	50000	contig	no
+chr5	138787073	138837073	1350	N	50000	contig	no
+chr5	155138727	155188727	1514	N	50000	contig	no
+chr5	180905260	180915260	1775	N	10000	telomere	no
+chr6	0	10000	1	N	10000	telomere	no
+chr6	10000	60000	2	N	50000	contig	no
+chr6	58087659	58137659	620	N	50000	clone	yes
+chr6	58780166	58830166	627	N	50000	clone	no
+chr6	58830166	61830166	628	N	3000000	centromere	no
+chr6	61830166	61880166	629	N	50000	clone	no
+chr6	62128589	62178589	633	N	50000	clone	yes
+chr6	95680543	95830543	992	N	150000	contig	no
+chr6	157559467	157609467	1652	N	50000	clone	yes
+chr6	157641300	157691300	1654	N	50000	clone	yes
+chr6	167942073	168042073	1761	N	100000	clone	yes
+chr6	170279972	170329972	1787	N	50000	clone	yes
+chr6	171055067	171105067	1797	N	50000	contig	no
+chr6	171105067	171115067	1798	N	10000	telomere	no
+chr6_apd_hap1	207807	252060	4	N	44253	clone	yes
+chr6_apd_hap1	327074	356458	6	N	29384	clone	yes
+chr6_apd_hap1	448574	536172	9	N	87598	clone	yes
+chr6_apd_hap1	588761	692953	11	N	104192	clone	yes
+chr6_apd_hap1	944435	963558	16	N	19123	clone	yes
+chr6_apd_hap1	970040	1029417	18	N	59377	clone	yes
+chr6_apd_hap1	1173934	1198528	23	N	24594	clone	yes
+chr6_apd_hap1	1307951	1317697	26	N	9746	clone	yes
+chr6_apd_hap1	1341208	1344139	28	N	2931	clone	yes
+chr6_apd_hap1	1451581	1556172	31	N	104591	clone	yes
+chr6_apd_hap1	1582549	1598989	33	N	16440	clone	yes
+chr6_apd_hap1	1736596	1786349	37	N	49753	clone	yes
+chr6_apd_hap1	1833811	1937682	39	N	103871	clone	yes
+chr6_apd_hap1	2009922	2164480	41	N	154558	clone	yes
+chr6_apd_hap1	2169002	2254169	43	N	85167	clone	yes
+chr6_apd_hap1	2359974	2761234	48	N	401260	clone	yes
+chr6_apd_hap1	2821679	2858431	51	N	36752	clone	yes
+chr6_apd_hap1	2894657	3037482	54	N	142825	clone	yes
+chr6_apd_hap1	3122973	3136480	56	N	13507	clone	yes
+chr6_apd_hap1	3180323	3355295	59	N	174972	clone	yes
+chr6_apd_hap1	3370179	3415446	62	N	45267	clone	yes
+chr6_apd_hap1	3491066	3594101	64	N	103035	clone	yes
+chr6_apd_hap1	3638667	3642320	66	N	3653	clone	yes
+chr6_apd_hap1	3696623	3736194	68	N	39571	clone	yes
+chr6_apd_hap1	3762870	3766192	70	N	3322	clone	yes
+chr6_apd_hap1	3790366	3970602	72	N	180236	clone	yes
+chr6_apd_hap1	4220467	4274176	76	N	53709	clone	yes
+chr6_apd_hap1	4390029	4597885	79	N	207856	clone	yes
+chr6_dbb_hap3	138055	245960	4	N	107905	clone	yes
+chr6_dbb_hap3	622457	640380	11	N	17923	clone	yes
+chr6_dbb_hap3	1337861	1344119	21	N	6258	clone	yes
+chr6_dbb_hap3	1836690	1837200	29	N	510	clone	yes
+chr6_dbb_hap3	2117295	2119813	33	N	2518	clone	yes
+chr6_dbb_hap3	2606257	2723615	42	N	117358	clone	yes
+chr6_dbb_hap3	3407000	3481304	52	N	74304	clone	yes
+chr6_dbb_hap3	3736386	3772614	59	N	36228	clone	yes
+chr6_dbb_hap3	4206542	4226964	66	N	20422	clone	yes
+chr6_dbb_hap3	4354126	4376794	69	N	22668	clone	yes
+chr6_mann_hap4	145852	184243	3	N	38391	clone	yes
+chr6_mann_hap4	1388812	1438812	19	N	50000	clone	yes
+chr6_mann_hap4	2047420	2056121	28	N	8701	clone	yes
+chr6_mann_hap4	2252213	2277026	32	N	24813	clone	yes
+chr6_mann_hap4	2312291	2324610	34	N	12319	clone	yes
+chr6_mann_hap4	2517052	2540962	37	N	23910	clone	yes
+chr6_mann_hap4	2772006	2810953	42	N	38947	clone	yes
+chr6_mann_hap4	2921096	2929556	45	N	8460	clone	yes
+chr6_mann_hap4	3062107	3151453	47	N	89346	clone	yes
+chr6_mann_hap4	3174271	3227203	49	N	52932	clone	yes
+chr6_mann_hap4	3261361	3374492	52	N	113131	clone	yes
+chr6_mann_hap4	3412258	3450867	54	N	38609	clone	yes
+chr6_mann_hap4	3553455	3557741	56	N	4286	clone	yes
+chr6_mann_hap4	3895092	3926341	62	N	31249	clone	yes
+chr6_mann_hap4	4236950	4245095	68	N	8145	clone	yes
+chr6_mann_hap4	4441658	4480941	73	N	39283	clone	yes
+chr6_mcf_hap5	135141	138043	3	N	2902	clone	yes
+chr6_mcf_hap5	179429	186757	5	N	7328	clone	yes
+chr6_mcf_hap5	229133	282732	7	N	53599	clone	yes
+chr6_mcf_hap5	383988	404415	10	N	20427	clone	yes
+chr6_mcf_hap5	491590	560421	12	N	68831	clone	yes
+chr6_mcf_hap5	676599	685588	15	N	8989	clone	yes
+chr6_mcf_hap5	994250	1009296	20	N	15046	clone	yes
+chr6_mcf_hap5	1177759	1292220	23	N	114461	clone	yes
+chr6_mcf_hap5	1562952	1726699	28	N	163747	clone	yes
+chr6_mcf_hap5	1815769	1838106	30	N	22337	clone	yes
+chr6_mcf_hap5	2091199	2189158	37	N	97959	clone	yes
+chr6_mcf_hap5	2241824	2251998	39	N	10174	clone	yes
+chr6_mcf_hap5	2300850	2305647	41	N	4797	clone	yes
+chr6_mcf_hap5	2712451	2811903	49	N	99452	clone	yes
+chr6_mcf_hap5	2947576	2958999	52	N	11423	clone	yes
+chr6_mcf_hap5	3154498	3188719	55	N	34221	clone	yes
+chr6_mcf_hap5	3329672	3355524	58	N	25852	clone	yes
+chr6_mcf_hap5	3602941	3659279	65	N	56338	clone	yes
+chr6_mcf_hap5	3899294	3929849	69	N	30555	clone	yes
+chr6_mcf_hap5	4149579	4149626	73	N	47	clone	yes
+chr6_mcf_hap5	4284353	4302274	76	N	17921	clone	yes
+chr6_mcf_hap5	4422172	4594253	78	N	172081	clone	yes
+chr6_qbl_hap6	521900	681210	9	N	159310	clone	yes
+chr6_qbl_hap6	2680914	2732064	37	N	51150	clone	yes
+chr6_qbl_hap6	3023176	3049607	42	N	26431	clone	yes
+chr6_qbl_hap6	3316246	3369039	46	N	52793	clone	yes
+chr6_qbl_hap6	3993031	4020006	57	N	26975	clone	yes
+chr6_ssto_hap7	861800	941242	15	N	79442	clone	yes
+chr6_ssto_hap7	1727717	1744408	28	N	16691	clone	yes
+chr6_ssto_hap7	1836632	1838892	34	N	2260	clone	yes
+chr6_ssto_hap7	1983561	1997824	37	N	14263	clone	yes
+chr6_ssto_hap7	2052735	2087415	39	N	34680	clone	yes
+chr6_ssto_hap7	2101474	2113875	41	N	12401	clone	yes
+chr6_ssto_hap7	2138491	2303018	43	N	164527	clone	yes
+chr6_ssto_hap7	2411247	2461193	48	N	49946	clone	yes
+chr6_ssto_hap7	3040184	3054966	60	N	14782	clone	yes
+chr6_ssto_hap7	3093568	3154699	62	N	61131	clone	yes
+chr6_ssto_hap7	3191196	3214366	64	N	23170	clone	yes
+chr6_ssto_hap7	3368931	3445987	68	N	77056	clone	yes
+chr6_ssto_hap7	4370564	4420564	83	N	50000	clone	yes
+chr6_ssto_hap7	4580410	4610218	86	N	29808	clone	yes
+chr6_ssto_hap7	4639806	4661556	88	N	21750	clone	yes
+chr6_ssto_hap7	4723509	4774367	90	N	50858	clone	yes
+chr6_ssto_hap7	4783798	4836049	92	N	52251	clone	yes
+chr7	0	10000	1	N	10000	telomere	no
+chr7	232484	282484	5	N	50000	clone	yes
+chr7	50370631	50410631	488	N	40000	contig	no
+chr7	58054331	61054331	564	N	3000000	centromere	no
+chr7	61310513	61360513	569	N	50000	heterochromatin	no
+chr7	61460465	61510465	571	N	50000	clone	yes
+chr7	61677020	61727020	573	N	50000	clone	yes
+chr7	61917157	61967157	575	N	50000	heterochromatin	no
+chr7	74715724	74765724	693	N	50000	clone	yes
+chr7	100556043	100606043	952	N	50000	clone	yes
+chr7	130154523	130254523	1278	N	100000	clone	yes
+chr7	139379377	139404377	1368	N	25000	contig	no
+chr7	142048195	142098195	1397	N	50000	clone	yes
+chr7	142276197	142326197	1399	N	50000	clone	yes
+chr7	143347897	143397897	1409	N	50000	clone	yes
+chr7	154270634	154370634	1517	N	100000	contig	no
+chr7	159128663	159138663	1563	N	10000	telomere	no
+chr8	0	10000	1	N	10000	telomere	no
+chr8	7474649	7524649	66	N	50000	contig	no
+chr8	12091854	12141854	105	N	50000	contig	no
+chr8	43838887	46838887	376	N	3000000	centromere	no
+chr8	48130499	48135599	389	N	5100	contig	no
+chr8	86576451	86726451	716	N	150000	contig	no
+chr8	142766515	142816515	1177	N	50000	clone	yes
+chr8	145332588	145432588	1209	N	100000	contig	no
+chr8	146304022	146354022	1216	N	50000	contig	no
+chr8	146354022	146364022	1217	N	10000	telomere	no
+chr9	0	10000	1	N	10000	telomere	no
+chr9	39663686	39713686	343	N	50000	clone	yes
+chr9	39974796	40024796	346	N	50000	contig	no
+chr9	40233029	40283029	348	N	50000	clone	yes
+chr9	40425834	40475834	350	N	50000	contig	no
+chr9	40940341	40990341	354	N	50000	contig	no
+chr9	41143214	41193214	357	N	50000	clone	yes
+chr9	41365793	41415793	359	N	50000	contig	no
+chr9	42613955	42663955	370	N	50000	contig	no
+chr9	43213698	43313698	375	N	100000	contig	no
+chr9	43946569	43996569	381	N	50000	clone	yes
+chr9	44676646	44726646	386	N	50000	clone	yes
+chr9	44908293	44958293	388	N	50000	clone	yes
+chr9	45250203	45350203	391	N	100000	contig	no
+chr9	45815521	45865521	397	N	50000	contig	no
+chr9	46216430	46266430	400	N	50000	clone	yes
+chr9	46461039	46561039	403	N	100000	contig	no
+chr9	47060133	47160133	408	N	100000	contig	no
+chr9	47317679	47367679	410	N	50000	clone	no
+chr9	47367679	50367679	411	N	3000000	centromere	no
+chr9	50367679	65367679	412	N	15000000	heterochromatin	no
+chr9	65367679	65467679	413	N	100000	contig	no
+chr9	65918360	65968360	418	N	50000	contig	no
+chr9	66192215	66242215	421	N	50000	clone	yes
+chr9	66404656	66454656	423	N	50000	clone	yes
+chr9	66614195	66664195	425	N	50000	clone	yes
+chr9	66863343	66913343	428	N	50000	clone	yes
+chr9	67107834	67207834	430	N	100000	contig	no
+chr9	67366296	67516296	432	N	150000	contig	no
+chr9	67987998	68137998	437	N	150000	contig	no
+chr9	68514181	68664181	441	N	150000	contig	no
+chr9	68838946	68988946	443	N	150000	contig	no
+chr9	69278385	69328385	446	N	50000	clone	yes
+chr9	70010542	70060542	452	N	50000	clone	yes
+chr9	70218729	70318729	454	N	100000	contig	no
+chr9	70506535	70556535	456	N	50000	contig	no
+chr9	70735468	70835468	458	N	100000	contig	no
+chr9	92343416	92443416	641	N	100000	clone	yes
+chr9	92528796	92678796	643	N	150000	clone	yes
+chr9	133073060	133223060	982	N	150000	contig	no
+chr9	137041193	137091193	1020	N	50000	contig	no
+chr9	139166997	139216997	1040	N	50000	contig	no
+chr9	141153431	141203431	1057	N	50000	contig	no
+chr9	141203431	141213431	1058	N	10000	telomere	no
+chrX	0	10000	1	N	10000	telomere	no
+chrX	10000	60000	2	N	50000	contig	no
+chrX	94821	144821	4	N	50000	contig	no
+chrX	231384	281384	8	N	50000	contig	no
+chrX	1047557	1097557	16	N	50000	contig	no
+chrX	1134113	1184113	18	N	50000	contig	no
+chrX	1264234	1314234	21	N	50000	contig	no
+chrX	2068238	2118238	31	N	50000	contig	no
+chrX	7623882	7673882	82	N	50000	clone	yes
+chrX	10738674	10788674	111	N	50000	clone	yes
+chrX	37098256	37148256	355	N	50000	contig	no
+chrX	49242997	49292997	484	N	50000	contig	no
+chrX	49974173	50024173	494	N	50000	contig	no
+chrX	52395914	52445914	518	N	50000	contig	no
+chrX	58582012	58632012	582	N	50000	clone	no
+chrX	58632012	61632012	583	N	3000000	centromere	no
+chrX	61632012	61682012	584	N	50000	clone	no
+chrX	76653692	76703692	724	N	50000	contig	no
+chrX	113517668	113567668	1183	N	50000	contig	no
+chrX	115682290	115732290	1214	N	50000	contig	no
+chrX	120013235	120063235	1268	N	50000	clone	yes
+chrX	143507324	143557324	1536	N	50000	contig	no
+chrX	148906424	148956424	1591	N	50000	clone	yes
+chrX	149032062	149082062	1594	N	50000	contig	no
+chrX	152277099	152327099	1624	N	50000	clone	yes
+chrX	155260560	155270560	1668	N	10000	telomere	no
+chrY	0	10000	1	N	10000	telomere	no
+chrY	44821	94821	3	N	50000	contig	no
+chrY	181384	231384	7	N	50000	contig	no
+chrY	997557	1047557	15	N	50000	contig	no
+chrY	1084113	1134113	17	N	50000	contig	no
+chrY	1214234	1264234	20	N	50000	contig	no
+chrY	2018238	2068238	30	N	50000	contig	no
+chrY	8914955	8964955	92	N	50000	contig	no
+chrY	9241322	9291322	97	N	50000	contig	no
+chrY	10104553	13104553	105	N	3000000	centromere	no
+chrY	13143954	13193954	107	N	50000	contig	no
+chrY	13748578	13798578	112	N	50000	contig	no
+chrY	20143885	20193885	169	N	50000	clone	yes
+chrY	22369679	22419679	190	N	50000	clone	yes
+chrY	23901428	23951428	204	N	50000	contig	no
+chrY	28819361	58819361	249	N	30000000	heterochromatin	no
+chrY	58917656	58967656	251	N	50000	contig	no
+chrY	59363566	59373566	255	N	10000	telomere	no

File resources/gap_withchr.complement.bed

+chr1	10000	177417
+chr1	227417	267719
+chr1	317719	471368
+chr1	521368	2634220
+chr1	2684220	3845268
+chr1	3995268	13052998
+chr1	13102998	13219912
+chr1	13319912	13557162
+chr1	13607162	17125658
+chr1	17175658	29878082
+chr1	30028082	103863906
+chr1	103913906	120697156
+chr1	120747156	120936695
+chr1	121086695	121485434
+chr1	142535434	142731022
+chr1	142781022	142967761
+chr1	143117761	143292816
+chr1	143342816	143544525
+chr1	143644525	143771002
+chr1	143871002	144095783
+chr1	144145783	144224481
+chr1	144274481	144401744
+chr1	144451744	144622413
+chr1	144672413	144710724
+chr1	144810724	145833118
+chr1	145883118	146164650
+chr1	146214650	146253299
+chr1	146303299	148026038
+chr1	148176038	148361358
+chr1	148511358	148684147
+chr1	148734147	148954460
+chr1	149004460	149459645
+chr1	149509645	205922707
+chr1	206072707	206332221
+chr1	206482221	223747846
+chr1	223797846	235192211
+chr1	235242211	248908210
+chr1	249058210	249240621
+chr2	10000	3529312
+chr2	3579312	5018788
+chr2	5118788	16279724
+chr2	16329724	21153113
+chr2	21178113	87668206
+chr2	87718206	89630436
+chr2	89830436	90321525
+chr2	90371525	90545103
+chr2	91595103	92326171
+chr2	95326171	110109337
+chr2	110251337	149690582
+chr2	149790582	234003741
+chr2	234053741	239801978
+chr2	239831978	240784132
+chr2	240809132	243102476
+chr2	243152476	243189373
+chr3	60000	66170270
+chr3	66270270	90504854
+chr3	93504854	194041961
+chr3	194047251	197962430
+chr4	10000	1423146
+chr4	1478646	8799203
+chr4	8818203	9274642
+chr4	9324642	31820917
+chr4	31837417	32834638
+chr4	32840638	40296396
+chr4	40297096	49338941
+chr4	49488941	49660117
+chr4	52660117	59739333
+chr4	59789333	75427379
+chr4	75452279	191044276
+chr5	10000	17530657
+chr5	17580657	46405641
+chr5	49405641	91636128
+chr5	91686128	138787073
+chr5	138837073	155138727
+chr5	155188727	180905260
+chr6	60000	58087659
+chr6	58137659	58780166
+chr6	61880166	62128589
+chr6	62178589	95680543
+chr6	95830543	157559467
+chr6	157609467	157641300
+chr6	157691300	167942073
+chr6	168042073	170279972
+chr6	170329972	171055067
+chr7	10000	232484
+chr7	282484	50370631
+chr7	50410631	58054331
+chr7	61054331	61310513
+chr7	61360513	61460465
+chr7	61510465	61677020
+chr7	61727020	61917157
+chr7	61967157	74715724
+chr7	74765724	100556043
+chr7	100606043	130154523
+chr7	130254523	139379377
+chr7	139404377	142048195
+chr7	142098195	142276197
+chr7	142326197	143347897
+chr7	143397897	154270634
+chr7	154370634	159128663
+chrX	60000	94821
+chrX	144821	231384
+chrX	281384	1047557
+chrX	1097557	1134113
+chrX	1184113	1264234
+chrX	1314234	2068238
+chrX	2118238	7623882
+chrX	7673882	10738674
+chrX	10788674	37098256
+chrX	37148256	49242997
+chrX	49292997	49974173
+chrX	50024173	52395914
+chrX	52445914	58582012
+chrX	61682012	76653692
+chrX	76703692	113517668
+chrX	113567668	115682290
+chrX	115732290	120013235
+chrX	120063235	143507324
+chrX	143557324	148906424
+chrX	148956424	149032062
+chrX	149082062	152277099
+chrX	152327099	155260560
+chr8	10000	7474649
+chr8	7524649	12091854
+chr8	12141854	43838887
+chr8	46838887	48130499
+chr8	48135599	86576451
+chr8	86726451	142766515
+chr8	142816515	145332588
+chr8	145432588	146304022
+chr9	10000	39663686
+chr9	39713686	39974796
+chr9	40024796	40233029
+chr9	40283029	40425834
+chr9	40475834	40940341
+chr9	40990341	41143214
+chr9	41193214	41365793
+chr9	41415793	42613955
+chr9	42663955	43213698
+chr9	43313698	43946569
+chr9	43996569	44676646
+chr9	44726646	44908293
+chr9	44958293	45250203
+chr9	45350203	45815521
+chr9	45865521	46216430
+chr9	46266430	46461039
+chr9	46561039	47060133
+chr9	47160133	47317679
+chr9	65467679	65918360
+chr9	65968360	66192215
+chr9	66242215	66404656
+chr9	66454656	66614195
+chr9	66664195	66863343
+chr9	66913343	67107834
+chr9	67207834	67366296
+chr9	67516296	67987998
+chr9	68137998	68514181
+chr9	68664181	68838946
+chr9	68988946	69278385
+chr9	69328385	70010542
+chr9	70060542	70218729
+chr9	70318729	70506535
+chr9	70556535	70735468
+chr9	70835468	92343416
+chr9	92443416	92528796
+chr9	92678796	133073060
+chr9	133223060	137041193
+chr9	137091193	139166997
+chr9	139216997	141153431
+chr10	60000	17974675
+chr10	18024675	38818835
+chr10	38868835	39154935
+chr10	42354935	42546687
+chr10	42596687	46426964
+chr10	46476964	47429169
+chr10	47529169	47792476
+chr10	47892476	48055707
+chr10	48105707	49095536
+chr10	49195536	51137410
+chr10	51187410	51398845
+chr10	51448845	125869472
+chr10	125919472	128616069
+chr10	128766069	133381404
+chr10	133431404	133677527
+chr10	133727527	135524747
+chr11	60000	1162759
+chr11	1212759	50783853
+chr11	51090853	51594205
+chr11	54694205	69089801
+chr11	69139801	69724695
+chr11	69774695	87688378
+chr11	87738378	96287584
+chr11	96437584	134946516
+chr12	60000	95739
+chr12	145739	7189876
+chr12	7239876	34856694
+chr12	37856694	109373470
+chr12	109423470	122530623
+chr12	122580623	132706992
+chr12	132806992	133841895
+chr13	19020000	86760324
+chr13	86910324	112353994
+chr13	112503994	114325993
+chr13	114425993	114639948
+chr13	114739948	115109878
+chr14	19000000	107289540
+chr15	20000000	20894633
+chr15	20935075	21398819
+chr15	21885000	22212114
+chr15	22262114	22596193
+chr15	22646193	23514853
+chr15	23564853	29159443
+chr15	29209443	82829645
+chr15	82879645	84984473
+chr15	85034473	102521392
+chr16	60000	8636921
+chr16	8686921	34023150
+chr16	34173150	35285801
+chr16	46385801	88389383
+chr16	88439383	90294753
+chr17	0	296626
+chr17	396626	21566608
+chr17	21666608	22263006
+chr17	25263006	34675848
+chr17	34725848	62410760
+chr17	62460760	77546461
+chr17	77596461	79709049
+chr17	79759049	81195210
+chr18	10000	15410898
+chr18	18510898	52059136
+chr18	52209136	72283353
+chr18	72333353	75721820
+chr18	75771820	78017248
+chr20	60000	26319569
+chr20	29419569	29653908
+chr20	29803908	34897085
+chr20	34947085	61091437
+chr20	61141437	61213369
+chr20	61263369	62965520
+chrY	10000	44821
+chrY	94821	181384
+chrY	231384	997557
+chrY	1047557	1084113
+chrY	1134113	1214234
+chrY	1264234	2018238
+chrY	2068238	8914955
+chrY	8964955	9241322
+chrY	9291322	10104553
+chrY	13104553	13143954
+chrY	13193954	13748578
+chrY	13798578	20143885
+chrY	20193885	22369679
+chrY	22419679	23901428
+chrY	23951428	28819361
+chrY	58819361	58917656
+chrY	58967656	59363566
+chr19	60000	7346004
+chr19	7396004	8687198
+chr19	8737198	20523415
+chr19	20573415	24631782
+chr19	27731782	59118983
+chr22	16050000	16697850
+chr22	16847850	20509431
+chr22	20609431	50364777
+chr22	50414777	51244566
+chr21	9411193	9595548
+chr21	9645548	9775437
+chr21	9825437	10034920
+chr21	10084920	10215976
+chr21	10365976	10647896
+chr21	10697896	11188129
+chr21	14338129	42955559
+chr21	43005559	44632664
+chr21	44682664	48119895
+chr6_ssto_hap7	0	861800
+chr6_ssto_hap7	941242	1727717
+chr6_ssto_hap7	1744408	1836632
+chr6_ssto_hap7	1838892	1983561
+chr6_ssto_hap7	1997824	2052735
+chr6_ssto_hap7	2087415	2101474
+chr6_ssto_hap7	2113875	2138491
+chr6_ssto_hap7	2303018	2411247
+chr6_ssto_hap7	2461193	3040184
+chr6_ssto_hap7	3054966	3093568
+chr6_ssto_hap7	3154699	3191196
+chr6_ssto_hap7	3214366	3368931
+chr6_ssto_hap7	3445987	4370564
+chr6_ssto_hap7	4420564	4580410
+chr6_ssto_hap7	4610218	4639806
+chr6_ssto_hap7	4661556	4723509
+chr6_ssto_hap7	4774367	4783798
+chr6_ssto_hap7	4836049	4928567
+chr6_mcf_hap5	0	135141
+chr6_mcf_hap5	138043	179429
+chr6_mcf_hap5	186757	229133
+chr6_mcf_hap5	282732	383988
+chr6_mcf_hap5	404415	491590
+chr6_mcf_hap5	560421	676599
+chr6_mcf_hap5	685588	994250
+chr6_mcf_hap5	1009296	1177759
+chr6_mcf_hap5	1292220	1562952
+chr6_mcf_hap5	1726699	1815769
+chr6_mcf_hap5	1838106	2091199
+chr6_mcf_hap5	2189158	2241824
+chr6_mcf_hap5	2251998	2300850
+chr6_mcf_hap5	2305647	2712451
+chr6_mcf_hap5	2811903	2947576
+chr6_mcf_hap5	2958999	3154498
+chr6_mcf_hap5	3188719	3329672
+chr6_mcf_hap5	3355524	3602941
+chr6_mcf_hap5	3659279	3899294
+chr6_mcf_hap5	3929849	4149579
+chr6_mcf_hap5	4149626	4284353
+chr6_mcf_hap5	4302274	4422172
+chr6_mcf_hap5	4594253	4833398
+chr6_cox_hap2	0	4795371
+chr6_mann_hap4	0	145852
+chr6_mann_hap4	184243	1388812
+chr6_mann_hap4	1438812	2047420
+chr6_mann_hap4	2056121	2252213
+chr6_mann_hap4	2277026	2312291
+chr6_mann_hap4	2324610	2517052
+chr6_mann_hap4	2540962	2772006
+chr6_mann_hap4	2810953	2921096
+chr6_mann_hap4	2929556	3062107
+chr6_mann_hap4	3151453	3174271
+chr6_mann_hap4	3227203	3261361
+chr6_mann_hap4	3374492	3412258
+chr6_mann_hap4	3450867	3553455
+chr6_mann_hap4	3557741	3895092
+chr6_mann_hap4	3926341	4236950