Blaz Zupan avatar Blaz Zupan committed 4d93222

no message

Comments (0)

Files changed (8)

+import math
+import Numeric
+import MA
+import MLab, LinearAlgebra
+
+import numarray as NA
+import numarray.ma as NM
+import numarray.linear_algebra as LA
+
+
+#####################################################################################
+## Strictly upper (lower) triangular matrix <-> 1D array
+#####################################################################################
+
+def triangularPut(m1d, upper=1, lower=0):
+    """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle.
+    Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix.
+    The lower triangular part (if requested) equals to the transposed upper triangular part.
+    If upper == lower == 1 a symetric matrix is returned.
+    """
+    assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower"
+    m1d = MA.asarray(m1d)
+    assert MA.rank(m1d) == 1, "1D masked array expected"
+    m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0]))
+    assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2"
+    if upper:
+        if lower:
+            mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0))
+        else:
+            mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0))
+    else:
+        if lower:
+            mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0))
+        else:
+            mask = Numeric.ones((m2dShape0, m2dShape0))
+
+    m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.typecode()))
+    condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0))
+    putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int))
+    MA.put(m2d, putIndices, m1d)
+    m2d = MA.reshape(m2d, (m2dShape0, m2dShape0))
+    m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d))
+    return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))
+
+
+def triangularGet(m2d, upper=1):
+    """Returns 1D masked array with elements from the upper (lower) triangular part of the given matrix.
+    For a symetric matrix triangularGet(m2d, 0) and triangularGet(m2d, 1) return elements in different order.
+    """
+    assert upper in [0,1], "upper: [0|1] expected"
+    m2d = MA.asarray(m2d)
+    assert MA.rank(m2d) == 2, "2D (masked) array expected"
+    if upper:
+        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i<j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
+    else:
+        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i>j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
+    return MA.take(MA.ravel(m2d), takeInd)
+
+
+#####################################################################################
+## Put 1D array on the diagonal of the given 2D array and returns a copy.
+#####################################################################################
+
+def diagonalPut(m1d, m2d):
+    """Puts the given 1D masked array into the diagonal of the given 2D masked array and returns a new copy of the 2D array.
+    """
+    m1d = MA.asarray(m1d)
+    m2d = MA.asarray(m2d)
+    assert MA.rank(m1d) == 1 and MA.rank(m2d) == 2, "1D and 2D masked array expected"
+    assert m1d.shape[0] == m2d.shape[0] == m2d.shape[1], "the shape of the given arrays does not match"
+    putIndices = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i==j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
+    m2dShape = m2d.shape
+    m2d = MA.ravel(m2d)
+    MA.put(m2d, putIndices, m1d)
+    return MA.reshape(m2d, m2dShape)
+    
+
+#####################################################################################
+#### linear interpolation of missing values by the given axis
+#####################################################################################
+##
+##def fillLinear1(ma, axis=0):
+##    """BUG: does not handle successive missing values
+##    """
+##    maflat = MA.array(MA.ravel(ma))
+##    mod = ma.shape[axis]
+##    maskInd = Numeric.compress(MA.getmaskarray(maflat), Numeric.arange(maflat.shape[0]))
+##    for idx in maskInd:
+##        if (idx % mod) == 0:  # the first one
+##            maflat[idx] = maflat[idx+1]
+##        elif (idx % mod) == mod-1:
+##            maflat[idx] = maflat[idx-1]
+##        else:
+##            maflat[idx] = (maflat[idx-1] + maflat[idx+1]) / 2.
+##    return MA.reshape(maflat, ma.shape)
+##
+##
+##def fillLinear2(ma, axis=0):
+##    """BUG: does not handle successive missing values
+##    """
+##    maflat = MA.array(MA.ravel(ma))
+##    mod = ma.shape[axis]
+##    maskInd = Numeric.compress(MA.getmaskarray(maflat), Numeric.arange(maflat.shape[0]))
+##    for idx in maskInd:
+##        idxFirst = idx-(idx%mod)
+##        idxNext = idxFirst+mod
+##        if MA.count(maflat[idxFirst:idxNext]) == 0:                                 # all elements are masked
+##            print "Warning: cannot do linear interpolation, all values are masked"
+##        elif idx == idxFirst:                                                        # the first element is masked
+##            maflat[idx] = maflat[idx+1:idxNext].compressed()[0]
+##        elif idx == idxNext-1:                                                      # the last element is masked
+##            maflat[idx] = maflat[idxFirst:idx].compressed()[-1]
+##        else:
+##            maflat[idx] = MA.average([MA.take(maflat,range(idx-1,idxFirst-1,-1)).compressed(), maflat[idx+1:idxNext].compressed(), MA.masked])
+####            maflat[idx] = (maflat[idxFirst:idx].compressed()[-1] + maflat[idx+1:idxNext].compressed()[0]) / 2.
+##    return MA.reshape(maflat, ma.shape)
+##
+##def fillLinear(ma, axis=0):
+##    """Linear interpolation of missing values;
+##    """
+##    pass
+##
+##
+##si = scipy.interpolate.interp1d(Numeric.array([[0,1,3,4],[0,1,3,4]]), Numeric.array([[0.1,1.1,3.2,4.4],[0,1,3,4]]))
+##si(2)
+
+###################################################################################
+## swapaxes for masked array
+###################################################################################
+
+def swapaxesMA(ma, axis1, axis2):
+    if axis1 < axis2:
+        m,M = axis1, axis2
+    elif axis1 > axis2:
+        m,M = axis2, axis1
+    elif 0 <= axis1 == axis2 < len(ma.shape):
+        return ma
+    else:
+        raise ValueError("Bad axis1 or axis2 argument to swapaxes.")
+    axList = range(m) + [M] + range(m+1,M) + [m] + range(M+1,len(ma.shape))
+    return MA.transpose(ma, axList)
+
+
+###################################################################################
+## compress MA to Numeric and return Numeric + indices of non-masked places
+###################################################################################
+
+def compressIndices(ma):
+    """Returns 1D compressed Numeric array and the indices of the non-masked places.
+    usage:  nu,ind = compressIndices(ma)
+            nu = Numeric.f_elementwise(nu)
+            ma = MA.put(ma, ind, nu)
+    """
+    ma = MA.asarray(ma)
+    nonMaskedInd = Numeric.compress(1-Numeric.ravel(MA.getmaskarray(ma)), Numeric.arange(Numeric.multiply.reduce(ma.shape)))
+    return MA.filled(ma.compressed()), nonMaskedInd
+
+
+###################################################################################
+## compact print of Numeric or MA
+###################################################################################
+
+def aprint(a,decimals=2):
+    """prints array / masked array of floats"""
+    if type(a) == Numeric.ArrayType:
+        print Numeric.around(a.astype(Numeric.Float0),decimals)
+    elif type(a) == MA.MaskedArray:
+        print MA.around(a.astype(MA.Float0),decimals)
+
+
+###################################################################################
+## Numeric -> numarray
+## MA -> numarray.ma
+###################################################################################
+
+def _nu2na(a):
+    """Numeric -> numarray"""
+    a = Numeric.asarray(a)
+    return NA.array(a)
+
+def _ma2nm(m):
+    """MA -> numarray.ma"""
+    m = MA.asarray(m)
+    return NM.array(m.raw_data(), mask=m.raw_mask())
+
+
+###################################################################################
+## numarray -> Numeric
+## numarray.ma -> MA
+###################################################################################
+
+def _na2nu(a):
+    """numarray -> Numeric"""
+    a = NA.asarray(a)
+    return Numeric.array(a)
+
+def _nm2ma(m):
+    """numarray.ma -> MA"""
+    m = NM.asarray(m)
+    return MA.array(m.raw_data(), mask=m.raw_mask())
+
+
+###################################################################################
+## min / max along the given axis
+## SLOW DUE TO SORT
+###################################################################################
+
+def minMA(m,axis=0):
+    """slow: remove sorting"""
+    m = MA.asarray(m, MA.Float)
+    transList = [axis] + range(0,axis) + range(axis+1,MA.rank(m))
+    m = MA.transpose(m, transList)    # do not use swapaxes
+    return MA.sort(m, 0, fill_value=1e20)[0]
+    
+def maxMA(m,axis=0):
+    """slow: remove sorting"""
+    m = MA.asarray(m, MA.Float)
+    transList = [axis] + range(0,axis) + range(axis+1,MA.rank(m))
+    m = MA.transpose(m, transList)    # do not use swapaxes
+    return MA.sort(m, 0, fill_value=-1e20)[-1]
+
+###################################################################################
+## median
+###################################################################################
+
+def median(m,axis=0):
+    m = Numeric.asarray(m)
+    return MLab.median(Numeric.transpose(m, [axis]+range(axis)+range(axis+1,Numeric.rank(m))))  # do not use swapaxes
+
+
+def medianMA(m,axis=0):
+    """Returns median of the given masked array along the given axis."""
+    return _nm2ma(_medianNM(_ma2nm(m),axis))
+
+
+def _medianNA(m,axis=0):
+    m = NA.asarray(m)
+    return LA.mlab.median(NA.transpose(m, [axis]+range(axis)+range(axis+1,m.rank)))   # do not use swapaxes
+
+def _medianNM(m,axis=0):
+    """Returns median of the given masked numarray along the given axis."""
+    m = NM.asarray(m, NM.Float)
+    if len(m.shape) == 0:
+        return m[0]
+    elif len(m.shape) == 1:
+        c = NM.count(m)
+        mIndSort = NM.argsort(m,fill_value=1e20)
+        return (m[mIndSort[(c-1)/2]] + m[mIndSort[c/2]]) / 2.
+    else:
+        # count the number of nonmasked indices along the given axis
+        c = NM.count(m, axis=axis)
+        # prepare indices for other axis (except for the given)
+        cind = NA.indices(NA.shape(c))
+        # indices of m sorted by increasing value at axis 0; masked values placed at the end;
+        # use _argsortSwapNM() to get the shape same as m
+        mtIndSort = _argsortSwapNM(m,axis,fill_value=1e20)
+        # get indices to address median elements from m
+        # tuple(takeInd1): all lists in the tuple must be of the same shape,
+        #   the result mtIndSort[tuple(takeInd1)] is also of that shape
+        takeInd1 = cind.tolist()
+        takeInd1.insert(axis,((c-1)/2).tolist())
+        medInd1 = mtIndSort[tuple(takeInd1)]
+        takeInd2 = cind.tolist()
+        takeInd2.insert(axis,(c/2).tolist())
+        medInd2 = mtIndSort[tuple(takeInd2)]
+        # get both median elements; if c[i] odd: med1[i] == med2[i]
+        takeMed1 = cind.tolist()
+        takeMed1.insert(axis,medInd1.tolist())
+        med1 = m[tuple(takeMed1)]
+        takeMed2 = cind.tolist()
+        takeMed2.insert(axis,medInd2.tolist())
+        med2 = m[tuple(takeMed2)]
+##        if __name__=="__main__":
+##            print "m\n",m.filled(-1)
+##            print "c", c
+##            print "[(c-1)/2,c/2]", [(c-1)/2,c/2]
+##            print "cind\n",cind
+##            print "mtIndSort\n",mtIndSort
+##            print "medInd1\n",medInd1
+##            print "medInd2\n",medInd2
+##            print "med1\n",med1
+##            print "med2\n",med2
+        return (med1+med2)/2.
+
+def _argsortSwapNM(m,axis=0,fill_value=None):
+    """Returns the indices along the given axis sorted by increasing value of the given masked numarray
+    e.g. m=[[.5,.2],[.3,.6]], _argsortSwapNM(m,0)->[[1,0],[0,1]]"""
+    m = NM.asarray(m)
+    return NA.swapaxes(NA.asarray(NM.argsort(m, axis, fill_value)), -1, axis)
+
+
+###################################################################################
+## std. deviation MA
+###################################################################################
+
+def stdMA(m,axis=0):
+    m = MA.asarray(m)
+    m = MA.transpose(m, [axis] + range(0,axis) + range(axis+1,MA.rank(m)))  # do not use swapaxes, use (axis, 0...axis-1, axis+1...rank)
+    return MA.sqrt(MA.add.reduce((MA.subtract(m,MA.average(m,0)))**2,0)/(m.shape[0]-1))
+
+
+###################################################################################
+## median absolute deviation
+###################################################################################
+
+def mad(m,axis=0):
+    """Returns Median Absolute Deviation of the given array along the given axis.
+    """
+    m = Numeric.asarray(m)
+    mx = Numeric.asarray(median(m,axis),Numeric.Float)
+    xt = Numeric.transpose(m, [axis]+range(axis)+range(axis+1,Numeric.rank(m))) # do not use swapaxes
+    return MLab.median(Numeric.absolute(xt-mx))
+
+def madMA(m,axis=0):
+    """Returns Median Absolute Deviation of the given masked array along the given axis.
+    """
+    m = MA.asarray(m)
+    mx = MA.asarray(medianMA(m,axis),MA.Float)
+    xt = MA.transpose(m, [axis]+range(axis)+range(axis+1,MA.rank(m)))   # do not use swapaxes
+    return medianMA(MA.absolute(xt-mx))
+
+def _madNA(m,axis=0):
+    """Returns Median Absolute Deviation of the given numarray along the given axis.
+    """
+    m = NA.asarray(m)
+    mx = NA.asarray(_medianNA(m,axis),NA.Float)
+    xt = NA.transpose(m, [axis]+range(axis)+range(axis+1,m.rank))
+    return LA.mlab.median(NA.absolute(xt-mx))
+
+def _madNM(m,axis=0):
+    """Returns Median Absolute Deviation of the given masked numarray along the given axis.
+    """
+    m = NM.asarray(m, NM.Float)
+    mx = _medianNM(m, axis)
+    # BUG WARNING: transpose returns ObjectArray, use NM.array(..., NM.Float)
+    xt = NM.array(NM.transpose(m, [axis]+range(axis)+range(axis+1,NM.rank(m))), NM.Float)
+    return _medianNM(NM.absolute(xt-mx),0)
+
+
+###################################################################################
+## binomial factors
+###################################################################################
+
+def binomial(n,r):
+    """Returns array of elementwise binomial factors: n[i]! / r[i]!(n[i]-r[i])!
+    type(n) & type(r) == int | Numeric.ArrayType of int
+    compute in floats to avoid overflow; max. n = 1029
+    """
+##    n = Numeric.asarray(n).astype(Numeric.Int)
+##    r = Numeric.asarray(r).astype(Numeric.Int)
+    n = Numeric.asarray(n)
+    r = Numeric.asarray(r)
+    assert Numeric.shape(n) == Numeric.shape(r), "binomial(n,r): n and r of different shape"
+    bin = Numeric.zeros(Numeric.shape(n), Numeric.Float)
+    for idx in xrange(len(n.flat)):
+        ni1 = n.flat[idx] + 1
+        b = Numeric.ones((ni1), Numeric.Float)
+        for i in xrange(1,ni1):
+            for j in xrange(i-1,0,-1):
+                b[j] += b[j-1]
+        bin.flat[idx] = b[r.flat[idx]]
+    return bin
+
+def __binomial_single(n,r):
+    """
+    DEPRICATED, see binomial(n,r)
+    returns n! / r!(n-r)!
+    """
+    b = Numeric.ones((n+1), Numeric.Float)
+    for i in xrange(1,n+1):
+        for j in xrange(i-1,0,-1):
+            b[j] += b[j-1]
+    return b[r]
+
+
+
+
+
+
+if __name__ == "__main__":
+
+    m1 = NM.array([4,3,6,8,54,1], mask=[1,1,0,1,1,1])
+    m2a = NM.array([[1,2,3],[4,5,6],[7,8,9]], NM.Float, mask=[[0,1,0],[0,1,0],[0,0,1]])
+    n2 = NA.array([[3,1,2],[4,3,6],[1,4,7],[4,3,6]])
+    n3 = NA.array([n2,n2])
+    m2 = NM.array([[5,7,3,3],[6,4,5,1],[2,6,1,4],[3,4,5,1],[3,4,5,1],[6,4,2,3]], NM.Int, mask=[[0,1,0,0],[0,1,0,0],[0,0,0,1],[0,0,0,0],[0,0,0,1],[1,1,1,1]])
+    m2s = NM.array([[5,7,3,3],[6,4,5,1],[2,6,1,4],[3,4,5,1],[3,4,5,1],[6,4,2,3],[6,4,2,3],[6,4,2,3],[6,4,2,3],[6,4,2,3]], NM.Int, mask=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1]])
+    m2u = NM.array([[5,7,3,3],[6,4,5,1],[2,6,1,4],[3,4,5,1],[3,4,5,1],[6,4,2,3]], NM.Int, mask=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]])
+    m3 = NM.array([m2.raw_data(),10*m2.raw_data()],NM.Int, mask=[m2.raw_mask(),m2.raw_mask()], fill_value=-1)
+
+##    def test3D(m3):
+##        print "median\n",_medianNM(m3,0).filled(-1)
+##        print "......."
+##        for ii in range(m3.shape[0]):
+##            print m3[ii,:,:].filled(-1)
+##            print
+##        print "median\n",_medianNM(m3,1).filled(-1)
+##        print "......."
+##        for ii in range(m3.shape[1]):
+##            print m3[:,ii,:].filled(-1)
+##            print
+##        print "median\n",_medianNM(m3,2).filled(-1)
+##        print "......."
+##        for ii in range(m3.shape[2]):
+##            print m3[:,:,ii].filled(-1)
+##            print
+
+    # median    
+##    m1med = _medianNM(m1)
+##    m2med = _medianNM(m2,1)
+##    m3med = _medianNM(m3,2)
+
+    # MAD: must be equal!
+##    mad(m2u)
+##    mad(m2s)
+
+    # compare Numeric, MA, NA, NM
+##    import RandomArray
+##    lst1 = (RandomArray.random((7)) * 10).tolist()
+##    lst2 = (RandomArray.random((4,7)) * 10).tolist()
+##    lst3 = (RandomArray.random((3,4,7)) * 10).tolist()
+##
+##    def getAllArrays(lst1):
+##        # Numeric, MA
+##        nu1 = Numeric.array(lst1)
+##        ma1 = MA.array(lst1)
+##        ma1m = MA.masked_less(lst1, 5)
+##        # numarray, ma
+##        na1 = NA.array(lst1)
+##        nm1 = NM.array(lst1)
+##        nm1m = NM.masked_less(lst1, 5)
+##        return nu1, ma1, ma1m, na1, nm1, nm1m
+##
+##    nu1, ma1, ma1m, na1, nm1, nm1m = getAllArrays(lst1)
+##    nu2, ma2, ma2m, na2, nm2, nm2m = getAllArrays(lst2)
+##    nu3, ma3, ma3m, na3, nm3, nm3m = getAllArrays(lst3)
+
+    # linear interpolation of missing values (fillLinear)
+##    m1 = MA.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]], mask = [[1,0,0,0],[0,1,0,0],[0,0,0,1]])
+##    m2 = MA.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]], mask = [[1,1,0,0],[0,1,1,0],[0,0,1,1]])
+##    m1l = fillLinear(m1,1)
+##    m2l = fillLinear(m2,1)
+
+    # triangular matrix <-> 1D masked array
+##    m1 = MA.arange(0, 6)
+##    m2u = triangularPut(m1,1,0)
+##    m2l = triangularPut(m1,0,1)
+##    m2ul = triangularPut(m1,1,1)
+##    m2 = triangularPut(m1,0,0)
+##
+##    m2uld = diagonalPut(Numeric.arange(4), m2ul)
+##
+##    m1u = triangularGet(m2ul)
+##    m1l = triangularGet(m2ul, 0)
+    
+"""
+<name>ANOVA on Chip Data</name>
+<description>ANOVA on chip data (strains, replicas).</description>
+<category>Genomics</category>
+<icon>icons/ChipANOVA.png</icon>
+<priority>1120</priority>
+"""
+
+from OWWidget import *
+import OWGUI
+from OWChipDataFiles import ChipData
+import chipstat
+
+class ANOVAResults(orange.Orange):
+    pass
+
+class GeneSelection(orange.Orange):
+    pass
+
+class OWChipANOVA(OWWidget):
+    settingsList  = ['p1', 'p2', 'p3', 'filter1', 'filter2', 'filter3', 'commitOnChange']
+
+    def __init__(self, parent=None, name='ANOVA on Chip Data'):
+        OWWidget.__init__(self, parent, name, "ANOVA on chip data (strains, replicas).")
+        self.callbackDeposit = []
+
+        self.inputs = [("Structured Chip Data", ChipData, self.chipdata, 1)]
+        self.outputs = [("Selected Data", ChipData), ("Other Data", ChipData), ("Results of ANOVA", ANOVAResults), ("Gene Selection", GeneSelection)]
+
+        self.commitOnChange = 0
+        self.chipdata = None
+        self.p1, self.p2, self.p3 = (2, 2, 2)
+        self.filter1, self.filter2, self.filter3 = (1, 0, 0)
+        # Settings
+        self.loadSettings()
+        self.ps = None
+        self.selection = [None]*3
+
+        # GUI
+        # info
+        box = QVGroupBox("Info", self.controlArea)
+        self.infoa = QLabel('No data on input.', box)
+        self.infob = QLabel('', box)
+        OWGUI.separator(self.controlArea)
+
+        # gene selection
+        self.selectionBox = QVGroupBox("Gene Selection", self.controlArea)
+        self.factors = [('First factor (time)', 'p1', 'filter1'),
+                   ('Second factor (strain)', 'p2', 'filter2'),
+                   ('Interaction factor (time*strain)', 'p3', 'filter3')]
+        self.pvals = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5]
+
+        self.numgenes = []
+        for i in range(3):
+            OWGUI.checkBox(self.selectionBox, self, self.factors[i][2], self.factors[i][0], callback=self.finalselection)
+            hbox = QHBox(self.selectionBox)
+            lbl = QLabel('', hbox)
+            lbl.setFixedSize(20, lbl.sizeHint().height())
+            lbl = QLabel('p <', hbox)
+            lbl.setFixedSize(25, lbl.sizeHint().height())
+            OWGUI.comboBox(hbox, self, self.factors[i][1], items = self.pvals, callback=lambda x=i: self.geneselection(x))
+            self.numgenes.append(QLabel('  (0 genes)', hbox))
+        self.selectionLbl = QLabel('Total of 0 genes match criteria', self.selectionBox)
+        self.selectionBox.setDisabled(1)
+
+        # output
+        box = QVGroupBox("Output", self.controlArea)
+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')
+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.senddata, disabled=1)
+
+        self.resize(200,100)
+        
+    def chipdata(self, data):
+        if data:
+            self.data = data
+            self.selectionBox.setEnabled(1)
+            nfiles = 0
+            for (n, d) in data:
+                nfiles += len(d)
+            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(data), nfiles))
+            d = data[0][1][0]
+            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))
+
+            self.analysis()
+        else:
+            self.send("Results of ANOVA", None)
+        self.commitBtn.setEnabled(data <> None)
+
+    def geneselection(self, indx):            
+        margin = self.pvals[getattr(self, self.factors[indx][1])]
+        p = [x[indx] for x in self.ps]
+        n = len(filter(lambda x: x<margin, p))
+        self.numgenes[indx].setText('  (%d %s)' % (n, ['genes', 'gene'][n==1]))
+        self.selection[indx] = map(lambda x: x<margin, p)
+        for x in self.selection:
+            if x==None: return
+        self.finalselection()
+
+    def finalselection(self):
+        if not self.ps:
+            return
+        self.match = [0]*len(self.ps)
+        for indx in range(3):
+            if getattr(self, self.factors[indx][2]) and self.selection[indx]:
+                self.match = map(lambda a,b: a or b, self.match, self.selection[indx])
+        n = self.match.count(1)
+        self.selectionLbl.setText('Total of %d %s match criteria' % (n, ['genes', 'gene'][n==1]))
+        if self.commitOnChange:
+            self.senddata()
+
+    def analysis(self):
+        self.progressBarInit()
+        pbStep = 90./len(self.data[0][1][0])
+        self.ps, anova_results = chipstat.anova_on_genes(self.data, callback=lambda: self.progressBarAdvance(pbStep))
+        self.send("Results of ANOVA", anova_results)
+        #print 'AAA', anova_results.classNames
+        self.progressBarSet(90)
+        self.selection = [None]*3
+        for indx in range(3):
+            self.geneselection(indx)
+        self.progressBarFinished()
+
+    def senddata(self):
+        self.send("Gene Selection", self.match)
+        n = self.match.count(1)
+        if n==len(self.data[0][1][0]): # all genes match
+            self.send("Selected Data", self.data)
+            self.send("Other Data", None)
+        elif n==0:
+            self.send("Selected Data", None)
+            self.send("Other Data", self.data)
+        else:
+            print 'processing'
+            P = []; N = []
+            for (strainname, data) in self.data:
+                dataP = [d.select(self.match) for d in data]
+                dataN = [d.select(self.match, negate=1) for d in data]
+                for i in range(len(data)):
+                    dataP[i].name = dataN[i].name = data[i].name
+                P.append((strainname, dataP))
+                N.append((strainname, dataN))
+            self.send("Selected Data", P)
+            self.send("Other Data", N)
+
+if __name__=="__main__":
+    a=QApplication(sys.argv)
+    ow=OWChipANOVA()
+    a.setMainWidget(ow)
+    ow.show()
+    a.exec_loop()
+    ow.saveSettings()

OWChipDataFiles.py

+"""
+<name>Chip Data Files</name>
+<description>Loads microarray data sets from designated directory.</description>
+<category>Genomics</category>
+<icon>icons/ChipDataFiles.png</icon>
+<priority>1100</priority>
+"""
+
+from OWWidget import *
+import OWGUI
+import os, os.path, orange
+
+class ChipData(orange.Orange):
+    pass
+
+class Mico(orange.Orange):
+    pass
+
+class myCheckListItem(QCheckListItem):
+    def __init__(self, *args):
+        self.callback = None
+        QCheckListItem.__init__(self, *args)
+        self.setOn(1)
+
+    def stateChange(self, b):
+        if self.callback:
+            self.callback()
+
+class OWChipDataFiles(OWWidget):
+    settingsList  = ["recentDirs","selectedDirName", "applyOnChange"]
+
+    def __init__(self, parent=None, name='Chip Data Files'):
+        OWWidget.__init__(self, parent, name, "ChipData loads microarray data sets from designated directory.")
+
+        self.callbackDeposit = []
+
+        self.inputs = []
+        self.outputs = [("Structured Chip Data", ChipData)]
+
+        self.chipdata = None; self.datasets = None
+        # Settings
+        self.recentDirs=[]
+        self.selectedDirName = "None"
+        self.applyOnChange = 0
+        self.loadSettings()
+        print 'ddd (1)', self.recentDirs
+
+        # GUI
+        # CONTROLS
+        box = QHGroupBox("Directory", self.controlArea)
+        self.dircombo=QComboBox(box)
+        self.dircombo.setMinimumWidth(250)
+        button = OWGUI.button(box, self, '...', callback = self.browseDirectory, disabled=0)
+        button.setMaximumWidth(25)
+
+        # info
+        box = QVGroupBox("Info", self.controlArea)
+        self.infoa = QLabel('No data loaded.', box)
+        self.infob = QLabel('', box)
+            
+        # Output
+        out = QVGroupBox("Output", self.controlArea)
+        OWGUI.checkBox(out, self, 'applyOnChange', 'Commit data on selection change')
+        self.commitBtn = OWGUI.button(out, self, "Commit", callback=self.sendData, disabled=1)
+
+        # connecting GUI to code
+        self.connect(self.dircombo,SIGNAL('activated(int)'),self.selectDir)
+        self.resize(500,200)
+            
+        # LIST VIEW
+        self.layout=QVBoxLayout(self.mainArea)
+        self.splitter = QSplitter(QSplitter.Vertical, self.mainArea)
+        self.layout.add(self.splitter)
+
+        self.tree = QListView(self.splitter)
+        self.tree.setAllColumnsShowFocus(1)
+        self.tree.addColumn('Directory/Data File')
+        self.tree.setColumnWidth(0, 300)
+        self.tree.setColumnWidthMode(0, QListView.Manual)
+        self.tree.setColumnAlignment(0, QListView.AlignLeft)
+            
+        self.recentDirs=filter(os.path.exists,self.recentDirs)
+        self.setDirlist()
+        self.dircombo.setCurrentItem(0)
+
+        if self.recentDirs!=[]:
+            self.loadData(self.recentDirs[0])
+        print 'ddd (2)', self.recentDirs
+
+        self.commitBtn = OWGUI.button(box, self, "Test", callback=self.test)
+            
+    def test(self):
+        print 'ddd (tst)', self.recentDirs
+        print 'ddd (tst) type', type(self.recentDirs), type(self.recentDirs[0])
+        
+    def setFileTree(self):
+        self.tree.clear()
+        self.listitems = []
+        for d in self.chipdata:
+            (dirname, files) = d
+            diritem = myCheckListItem(self.tree, dirname, QCheckListItem.CheckBox)
+            diritem.callback = self.selectionChanged
+            self.listitems.append(diritem)
+            diritem.setOpen(1)
+            diritem.name = dirname
+            for f in files:
+                item = myCheckListItem(diritem, f.name, QCheckListItem.CheckBox)
+                item.callback = self.selectionChanged
+                self.listitems.append(item)
+                item.data = f
+                
+    def selectionChanged(self):
+        if self.applyOnChange and self.okToCommit:
+            self.sendData()
+
+    # checks which data has been selected, builds a chip data structure, and sends it out
+    def sendData(self):
+        data = []
+        dir = self.tree.firstChild()
+        while dir:
+            if dir.isOn():
+                files = []
+                f = dir.firstChild()
+                while f:
+                    if f.isOn():
+                        files.append(f.data)
+                    f = f.nextSibling()
+                if len(files):
+                    data.append((dir.name, files))
+            dir = dir.nextSibling()
+        self.send("Structured Chip Data", data)
+
+    # Loads the chip data from a root directory, sends the data to the output channels
+    def loadData(self, root):
+        self.okToCommit = 0
+        if root == "(none)":
+            self.send("Structured Chip Data", None)
+        chipdata = [] # structured [(dirname0, [d00, d01, ...]), ...]
+        datasets = []  # flat list containing all the data sets
+        dirs = os.listdir(root)
+        for d in dirs:
+            dirname = root+'\\'+d
+            if os.path.isdir(dirname):
+                dirdata = []   
+                files  = os.listdir(dirname)
+                for f in files:
+                    name, ext = os.path.splitext(f)
+                    if ext in ['.tab', '.txt', '.data']:
+                        try:
+                            data = None
+                            data = orange.ExampleTable(dirname+'\\'+f)
+                            data.name = name
+                            dirdata.append(data)
+                        except orange.KernelException:
+                            print 'eee Exception, file not in appropriate format'
+                if len(dirdata):
+                    chipdata.append((os.path.split(dirname)[1], dirdata))
+                    datasets = datasets + dirdata
+
+        self.commitBtn.setEnabled(len(chipdata))
+        if len(chipdata):
+            self.chipdata = chipdata
+            self.datasets = datasets
+
+##            self.addDirToList(root)
+##            self.selectedDirName = root
+
+            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(self.chipdata), len(self.datasets)))
+            d = self.datasets[0]
+            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))
+            
+            self.setFileTree()
+            self.okToCommit = 1
+            self.sendData()
+
+    # displays a file dialog and selects a directory
+    def browseDirectory(self):
+        if len(self.recentDirs):
+            startdir=os.path.split(self.recentDirs[0][:-1])[0]
+        else:
+            startdir ="."
+        dirname=str(QFileDialog.getExistingDirectory(startdir, None, '', 'Microarray Data Directory', 1))
+        if len(dirname):
+            self.loadData(str(dirname))
+            self.addDirToList(dirname) # XXX do this only if loadData successfull
+
+    def setDirlist(self):
+        self.dircombo.clear()
+        if len(self.recentDirs):
+            for dir in self.recentDirs:
+                print 'DDD', dir
+                (upperdir,dirname)=os.path.split(dir[:-1]) #:-1 removes the trailing '\'
+                print '---'
+                #leave out the path
+                self.dircombo.insertItem(dirname)
+        else:
+            self.dircombo.insertItem("(none)")
+        self.dircombo.adjustSize() #doesn't work properly :(
+
+    def addDirToList(self, dir):
+        # Add a directory to the start of the file list. 
+        # If it exists, move it to the start of the list
+        print 'ddd (4)', self.recentDirs
+        if dir in self.recentDirs:
+            self.recentDirs.remove(dir)
+        self.recentDirs.insert(0, str(dir))
+        self.setDirlist()
+        print 'ddd (5)', self.recentDirs
+        self.selectedDirName = dir
+
+    # called when user makes a selection from the drop-down menu
+    def selectDir(self, n):
+        if self.recentDirs:
+            self.loadData(self.recentDirs[n])
+            self.addDirToList(self.recentDirs[n])
+        else:
+            self.loadData("(none)")
+
+if __name__=="__main__":
+    a=QApplication(sys.argv)
+    ow=OWChipDataFiles()
+    a.setMainWidget(ow)
+
+    ow.show()
+    a.exec_loop()
+    ow.saveSettings()
+    print 'ddd', ow.recentDirs

OWChipPostHocANOVA.py

+"""
+<name>Post Hoc ANOVA on Chip Data</name>
+<description>Post Hoc ANOVA on chip data (strains, replicas).</description>
+<category>Genomics</category>
+<icon>icons/ChipPostHocANOVA.png</icon>
+<priority>1130</priority>
+"""
+
+from OWWidget import *
+import OWGUI
+from OWChipDataFiles import ChipData
+from OWChipANOVA import ANOVAResults, GeneSelection
+import chipstat
+
+class OWChipPostHocANOVA(OWWidget):
+    settingsList  = ['commitOnChange', 'f1', 'f2', 'p1', 'p2', 'p3', 'filter1', 'filter2', 'filter3']
+
+    def __init__(self, parent=None, name='ANOVA on Chip Data'):
+        OWWidget.__init__(self, parent, name, "ANOVA on chip data (strains, replicas).")
+        self.callbackDeposit = []
+
+        self.inputs = [("Results of ANOVA", ANOVAResults, self.anovaresults, 1)]
+        self.outputs = [("PostHoc Gene Selection", GeneSelection)]
+
+        self.commitOnChange = 0
+        self.anova = None
+
+        self.p1, self.p2, self.p3 = (2, 2, 2)
+        self.filter1, self.filter2, self.filter3 = (1, 0, 0)
+        self.f1 = None; self.f2 = None
+        # Settings
+        self.loadSettings()
+        self.ps = None
+        self.selection = [None]*3
+
+        # GUI
+        # info
+        box = QVGroupBox("Info", self.controlArea)
+        self.infoa = QLabel('No data on input.', box)
+        OWGUI.separator(self.controlArea)
+
+        # factors
+        box = QVGroupBox("Factors", self.controlArea)
+        self.f1Combo = OWGUI.comboBox(box, self, 'f1', sendSelectedValue=1, callback=[self.setFactorCombos, self.analysis], label="First:", labelWidth=50, orientation="horizontal")
+        self.f1Combo.setDisabled(1)
+        self.f2Combo = OWGUI.comboBox(box, self, 'f2', sendSelectedValue=1, callback=[self.setFactorCombos, self.analysis], label="Second:", labelWidth=50, orientation="horizontal")
+        self.f2Combo.setDisabled(1)
+
+        # gene selection
+        self.selectionBox = QVGroupBox("Gene Selection", self.controlArea)
+        self.factors = [('First factor (time)', 'p1', 'filter1'),
+                   ('Second factor (strain)', 'p2', 'filter2'),
+                   ('Interaction factor (time*strain)', 'p3', 'filter3')]
+        self.pvals = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5]
+
+        self.numgenes = []
+        for i in range(3):
+            OWGUI.checkBox(self.selectionBox, self, self.factors[i][2], self.factors[i][0], callback=self.finalselection)
+            hbox = QHBox(self.selectionBox)
+            lbl = QLabel('', hbox)
+            lbl.setFixedSize(20, lbl.sizeHint().height())
+            lbl = QLabel('p <', hbox)
+            lbl.setFixedSize(25, lbl.sizeHint().height())
+            OWGUI.comboBox(hbox, self, self.factors[i][1], items = self.pvals, callback=lambda x=i: self.geneselection(x))
+            self.numgenes.append(QLabel('  (0 genes)', hbox))
+        self.selectionLbl = QLabel('Total of 0 genes selected', self.selectionBox)
+        self.selectionBox.setDisabled(1)
+
+        # output
+        box = QVGroupBox("Output", self.controlArea)
+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')
+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.senddata, disabled=1)
+
+        self.resize(200,100)
+
+    def setFactorCombos(self):
+
+        def setOneCombo(combo, exclude):
+            combo.clear()
+            labels = [x for x in self.anova.classNames]
+            if exclude in self.anova.classNames:
+                labels.remove(exclude)
+            for l in labels:
+                combo.insertItem(l)
+
+        items = self.anova.classNames
+        if not self.f1 in items:
+            self.f1 = items[items[0]==self.f2]
+        if not self.f2 in items:
+            self.f2 = items[items[0]==self.f2]
+
+        setOneCombo(self.f1Combo, self.f2)
+        setOneCombo(self.f2Combo, self.f1)
+        self.f1 = self.f1 # this looks stupid, but it's not:
+        self.f2 = self.f2 # it sets the right item to the combo boxes
+
+    def anovaresults(self, anova):
+        self.anova = anova
+        self.selectionBox.setEnabled(anova <> None)
+        self.commitBtn.setEnabled(anova <> None)
+        if anova:
+            self.setFactorCombos()
+            self.f1Combo.setEnabled(1)
+            self.f2Combo.setEnabled(1)
+            self.analysis(commit=1)
+            self.infoa.setText('ANOVA results on %d strains' % len(self.anova.classNames))
+        else:
+            self.send("PostHoc Gene Selection", None)
+            self.f1Combo.clear()
+            self.f1Combo.setDisabled(1)
+        
+
+    def geneselection(self, indx, commit=0):            
+        margin = self.pvals[getattr(self, self.factors[indx][1])]
+        p = [x[indx] for x in self.ps]
+        n = len(filter(lambda x: x<margin, p))
+        self.numgenes[indx].setText('  (%d %s)' % (n, ['genes', 'gene'][n==1]))
+        self.selection[indx] = map(lambda x: x<margin, p)
+        for x in self.selection:
+            if x==None: return
+        self.finalselection(commit)
+
+    def finalselection(self, commit=0):
+        if not self.ps:
+            return
+        self.match = [0]*len(self.ps)
+        for indx in range(3):
+            if getattr(self, self.factors[indx][2]) and self.selection[indx]:
+                self.match = map(lambda a,b: a or b, self.match, self.selection[indx])
+        n = self.match.count(1)
+        print self.match
+        self.selectionLbl.setText('Total of %d %s match criteria' % (n, ['genes', 'gene'][n==1]))
+        if self.commitOnChange or commit:
+            self.senddata()
+
+    def analysis(self, commit=0):
+        self.progressBarInit()
+        if not self.anova:
+            return
+        pbStep = 90./len(self.anova.anovaList)
+        self.ps = chipstat.posthoc_anova_on_genes(self.f1, self.f2, self.anova, callback=lambda: self.progressBarAdvance(pbStep))
+        self.progressBarSet(90)
+        self.selection = [None]*3
+        for indx in range(3):
+            self.geneselection(indx, commit=1)
+        self.progressBarFinished()
+
+    def senddata(self):
+        self.send("PostHoc Gene Selection", self.match)
+
+if __name__=="__main__":
+    a=QApplication(sys.argv)
+    ow=OWChipPostHocANOVA()
+    a.setMainWidget(ow)
+    ow.show()
+    a.exec_loop()
+    ow.saveSettings()
 from OWWidget import *
 from OWOptions import *
 from qwt import *
+from OWChipDataFiles import ChipData
+
+from OWChipANOVA import ANOVAResults
 
 ##############################################################################
 # parameters that determine the canvas layout
         self.callbackDeposit = [] # deposit for OWGUI callback functions
         OWWidget.__init__(self, parent, name, 'Microarray Heat Map', FALSE, FALSE) 
         
-        self.inputs = [("Examples", ExampleTable, self.dataset, 0)]
+        self.inputs = [("Examples", ExampleTable, self.dataset, 0), ("Structured Chip Data", ChipData, self.chipdata, 1)]
         self.outputs = [("Examples", ExampleTable), ("Classified Examples", ExampleTableWithClass)]
 
         #set default settings
         self.LegendOnTop = 1           # legend stripe on top (bottom)?
         self.LegendOnBottom = 1
         self.ShowGroupLabel = 1        # show class names in case of classified data?
-        self.ShowAverageStripe = 1     # show the stripe with the evarage
+        self.ShowAverageStripe = 0     # show the stripe with the evarage
         self.MaintainArrayHeight = 1   # adjust cell height while changing the merge factor
         self.BShowballoon = 1          # balloon help
         self.ShowGeneAnnotations = 1   # show annotations for genes
         box = QVButtonGroup("Annotation && Legends", tab)
         OWGUI.checkBox(box, self, 'LegendOnTop', 'Show legend', callback=self.drawHeatMap)
         OWGUI.checkBox(box, self, 'ShowAverageStripe', 'Stripes with averages', callback=self.drawHeatMap)
-        OWGUI.checkBox(box, self, 'ShowGeneAnnotations', 'Gene annotations', callback=self.drawHeatMap)
+        self.geneAnnotationsCB = OWGUI.checkBox(box, self, 'ShowGeneAnnotations', 'Gene annotations', callback=self.drawHeatMap)
         
         self.annotationCombo = OWGUI.comboBox(box, self, "BAnnotationIndx", items=[], callback=lambda x='BAnnotationVar', y='BAnnotationIndx': self.setMetaID(x, y))
 
         OWGUI.checkBox(box, self, 'BShowballoon', "Show balloon", callback=lambda: self.balloonInfoBox.setDisabled(not self.BShowballoon))
         box = QVButtonGroup("Balloon Info", tab)
         OWGUI.checkBox(box, self, 'BShowColumnID', "Column ID")
-        OWGUI.checkBox(box, self, 'BShowSpotIndex', "Spot Index", callback=lambda: self.spotCombo.setDisabled(not self.BShowSpotIndex))
+        self.spotIndxCB = OWGUI.checkBox(box, self, 'BShowSpotIndex', "Spot Index", callback=lambda: self.spotCombo.setDisabled(not self.BShowSpotIndex))
         self.spotCombo = OWGUI.comboBox(box, self, "BSpotIndx", items=[], callback=lambda x='BSpotVar', y='BSpotIndx': self.setMetaID(x, y))
         OWGUI.checkBox(box, self, 'BShowGeneExpression', "Gene expression")
         OWGUI.checkBox(box, self, 'BShowAnnotation', "Annotation")
         
     # any time the data changes, the two combo boxes showing meta attributes
     # have to be adjusted
-    def setMetaCombo(self, cb, value, enabled=1, default=None): 
+    def setMetaCombo(self, cb, value, enabled=1, default=None):
         cb.clear()
         if len(self.meta)==0:
             cb.setDisabled(True)
-            return (None, None, None)
+            self.spotIndxCB.setDisabled(1); self.geneAnnotationsCB.setDisabled(1)
+            return (None, None)
         cb.setDisabled(not enabled)
+        self.spotIndxCB.setEnabled(1); self.geneAnnotationsCB.setEnabled(1)
         for m in self.meta:
             cb.insertItem(m)
         
         if not (value in self.meta):
-            value = default
+            if default in self.meta:
+                value = default
+            else:
+                value = None
 
         if value in self.meta:
             cb.setCurrentItem(self.meta.index(value))
 
     def setMetaCombos(self):
         self.meta = [m.name for m in self.data[0].domain.getmetas().values()]
-        (self.BSpotVar, self.BSpotIndx) = self.setMetaCombo(self.spotCombo, self.BSpotVar, enabled=self.BShowSpotIndex, default='RMI')
-        (self.BAnnotationVar, self.BAnnotationIndx) = self.setMetaCombo(self.annotationCombo, self.BAnnotationVar, enabled=self.BShowAnnotation, default='annotation')
+        self.BSpotVar, self.BSpotIndx = self.setMetaCombo(self.spotCombo, self.BSpotVar, enabled=self.BShowSpotIndex, default='RMI')
+        self.BAnnotationVar, self.BAnnotationIndx = self.setMetaCombo(self.annotationCombo, self.BAnnotationVar, enabled=self.BShowAnnotation, default='xannotation')
 
     ##########################################################################
     # handling of input/output signals
 
     def dataset(self, data, id):
+        dataIDs = [d.id for d in self.data]
         if not data:
-            # should remove the data where necessary
-            return
-        # check if the same length
-        if data.domain.classVar:
-            domain = self.checkDomain(data)
-            if domain:
-                data = orange.ExampleTable(domain, data)
-##                print 'changed!'
-##        print 'len=', len(data)
-##        if data.domain.classVar:
-##            print 'class', len(data.domain.classVar.values)
-##        else:
-##            print 'no class'
-        data.setattr("id", id)
-        if len(self.data) and (id in [d.id for d in self.data]):
-            for i in range(len(self.data)):
-                if id==self.data[i].id:
-                    self.data[i] = data
-                    self.fileLB.changeItem(data.name, i)
-                    break
+            if id in dataIDs:
+                del self.data[dataIDs.index(id)]
         else:
-            self.data.append(data)
-            self.fileLB.insertItem(data.name)
+            # check if the same length
+            if data.domain.classVar:
+                domain = self.checkDomain(data)
+                if domain:
+                    data = orange.ExampleTable(domain, data)
+            data.setattr("id", id)
+            if len(self.data) and (id in dataIDs):
+                for i in range(len(self.data)):
+                    if id==self.data[i].id:
+                        self.data[i] = data
+                        self.fileLB.changeItem(data.name, i)
+                        break
+            else:
+                self.data.append(data)
+                self.fileLB.insertItem(data.name)
 
-        if (self.data <> None and len(self.data) > 1):
-            self.tabs.setTabEnabled(self.filesTab, 1)
-        else:
-            self.tabs.setTabEnabled(self.filesTab, 0)
+            if len(self.data) > 1:
+                self.tabs.setTabEnabled(self.filesTab, 1)
+            else:
+                self.tabs.setTabEnabled(self.filesTab, 0)
+            self.setMetaCombos() # set the two combo widgets according to the data
             
         self.send('Classified Examples', None)
         self.send('Examples', None)
-        self.setMetaCombos() # set the two combo widgets according to the data
         self.constructHeatmap()
+        self.canvas.update()
 
+    def chipdata(self, data):
+        print 'CHIPDATA'
+        self.data = [] # XXX should only remove the data from the same source, use id in this rutine
+        if not data:
+            for i in self.canvas.allItems():
+                i.setCanvas(None)
+            self.canvas.update()
+            return
+        indx = 0
+        for (strainname, ds) in data:
+            for d in ds:
+                self.dataset(d, indx)
+                indx += 1
+        self.createHeatMap()
+        self.canvas.update()
+        
     def constructHeatmap(self):
         self.heatmapconstructor = []
-        if self.SortGenes:
-            self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0]))
-        else:
-            self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0], None))
-        for i in range(len(self.data))[1:]:
-            self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[i], self.heatmapconstructor[0]))
+        if len(self.data):
+            if self.SortGenes:
+                self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0]))
+            else:
+                self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0], None))
+            for i in range(len(self.data))[1:]:
+                self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[i], self.heatmapconstructor[0]))
         self.createHeatMap()
 
     # remove unused values from the class of the data set
         cl = clo = data.domain.classVar
         if cl:
             if selection:
-##                print 'selection'
                 cl = orange.RemoveUnusedValues(cl, selection, removeOneValued = 1)
-##                if cl == clo:
-##                    print 'same'
             else:
                 cl = orange.RemoveUnusedValues(cl, data, removeOneValued = 1)
 
         for (g,s,e) in rows:
             hm = self.heatmaps[0][g]
             ex += hm.examples[hm.exampleIndices[s] : hm.exampleIndices[e+1]]
-            print 'sss', s, e
 
         # Reduce the number of class values, if class is defined
         newdomain = self.checkDomain(self.data[0], selection=ex)
         if not newdomain:
-##            print 'retain'
             newdomain = self.data[0].domain
         selectedData = orange.ExampleTable(newdomain, ex)
         if selectedData.domain.classVar:
         pass
 
     def fileOrderChange(self, chg):
-##        print 'ccc', chg
         if chg==-1 and self.selectedFile>0:
             switchFiles(self.selectedFile, self.selectedFile-1)
         if chg==1  and self.selectedFile < len(self.data - 1):
 
         # annotate
         hm = self.heatmaps[0][group]
-        for (row, indices) in enumerate(hm.exampleIndices[:-1]):
-            t = QCanvasText(str(hm.examples[hm.exampleIndices[row]][self.BAnnotationVar]), self.canvas)
-            t.setFont(font)
-            t.setX(x); t.setY(y)
-            t.show()
-            y += self.CellHeight
+        if self.BAnnotationVar:
+            for (row, indices) in enumerate(hm.exampleIndices[:-1]):
+                t = QCanvasText(str(hm.examples[hm.exampleIndices[row]][self.BAnnotationVar]), self.canvas)
+                t.setFont(font)
+                t.setX(x); t.setY(y)
+                t.show()
+                y += self.CellHeight
 
     def drawHeatMap(self):
-        print "DRAW HEATMAP"
+        # remove everything from current canvas
+        for i in self.canvas.allItems():
+            i.setCanvas(None)
+        if not len(self.data):
+            return
+
         lo = self.CutEnabled and self.CutLow   or self.lowerBound
         hi = self.CutEnabled and self.CutHigh  or self.upperBound
 
-        # remove everything from current canvas
-        for i in self.canvas.allItems():
-            i.setCanvas(None)
         self.canvasView.heatmapParameters(self, self.CellWidth, self.CellHeight) # needed for event handling
 
         palette = self.ColorPalettes[self.CurrentPalette]
             bmpl = []
             for g in range(groups):
                 bmp, self.imageWidth, imageHeight = hm[g].getBitmap(int(self.CellWidth), int(self.CellHeight), lo, hi, self.Gamma)
-                print "HAVE BITMAP %i %i" % (self.imageWidth, imageHeight),
                 bmpl.append(bmp)
                 if not i: self.heights.append(imageHeight)
-                print "OK"
             self.bmps.append(bmpl)
 
         self.canvas.resize(2000, 2000) # this needs adjustment
         x = c_offsetX; y0 = c_offsetY
 
         self.legend = self.heatmapconstructor[0].getLegend(self.imageWidth, c_legendHeight, self.Gamma)
-        print "LEGEND OK"
         if self.LegendOnTop:
             y0 = self.drawLegend(x, y0, self.imageWidth, c_legendHeight, palette)
 
                     y = self.drawGroupLabel(self.data[i][0].domain.classVar.values[g], x, y, self.imageWidth)
                 if not i: self.imgStart.append(y)
                 ycoord.append(y)
-                print "DFN %i" % self.heights[g]
                 image = ImageItem(self.bmps[i][g], self.canvas, self.imageWidth, self.heights[g], palette, x=x, y=y, z=z_heatmap)
-                print "IMAGE %i" % g
                 image.hm = self.heatmaps[i][g] # needed for event handling
                 image.height = self.heights[g]; image.width = self.imageWidth
                 if not i: self.imgEnd.append(y+self.heights[g]-1)
         self.canvas.update()
         
     def createHeatMap(self):
-        merge = min(self.Merge, float(len(self.data[0])))
-        squeeze = 1. / merge
-        self.lowerBound = 1000; self.upperBound = -1000 # CHANGE!!!
-        self.heatmaps = []
-        for (i, hmc) in enumerate(self.heatmapconstructor):
-            hm, lb, ub = hmc(squeeze)
-            self.heatmaps.append(hm)
-            self.lowerBound = min(self.lowerBound, lb)
-            self.upperBound = max(self.upperBound, ub)
+        if len(self.data):
+            merge = min(self.Merge, float(len(self.data[0])))
+            squeeze = 1. / merge
+            self.lowerBound = 1000; self.upperBound = -1000 # CHANGE!!!
+            self.heatmaps = []
+            for (i, hmc) in enumerate(self.heatmapconstructor):
+                hm, lb, ub = hmc(squeeze)
+                self.heatmaps.append(hm)
+                self.lowerBound = min(self.lowerBound, lb)
+                self.upperBound = max(self.upperBound, ub)
 
-        self.sliderCutLow.setRange(self.lowerBound, 0, 0.1)
-        self.sliderCutHigh.setRange(1e-10, self.upperBound, 0.1)
-        self.CutLow = max(self.CutLow, self.lowerBound)
-        self.CutHigh = min(self.CutHigh, self.upperBound)
-        self.sliderCutLow.setValue(self.CutLow)
-        self.sliderCutHigh.setValue(self.CutHigh)
-        self.selection.remove()
+            self.sliderCutLow.setRange(self.lowerBound, 0, 0.1)
+            self.sliderCutHigh.setRange(1e-10, self.upperBound, 0.1)
+            self.CutLow = max(self.CutLow, self.lowerBound)
+            self.CutHigh = min(self.CutHigh, self.upperBound)
+            self.sliderCutLow.setValue(self.CutLow)
+            self.sliderCutHigh.setValue(self.CutHigh)
+            self.selection.remove()
         self.drawHeatMap()
 
 ##################################################################################################
             col, row = int(x / self.dx), int(y / self.dy)
             # hm.getCellIntensity(row, col), hm.getRowIntensity(row)
             ex = hm.examples[hm.exampleIndices[row] : hm.exampleIndices[row+1]]
-            #print 'xxx', hm.exampleIndices[row], hm.exampleIndices[row+1]
             self.selector.setX(item.x()+col*self.dx-v_sel_width+1)
             self.selector.setY(item.y()+row*self.dy-v_sel_width+1)
             self.selector.show()
             self.bubble.head.setText(head)
             # bubble, construct body
             body = None
-            if self.master.BShowSpotIndex or self.master.BShowAnnotation or self.master.BShowGeneExpression:
+            if (self.master.BShowSpotIndex and self.master.BSpotVar) or self.master.BShowAnnotation or self.master.BShowGeneExpression:
                 for (i, e) in enumerate(ex):
                     if i>5:
                         body += "\n... (%d more)" % (len(ex)-5)
                         break
                     else:
                         s = []
-                        if self.master.BShowSpotIndex:
+                        if self.master.BShowSpotIndex and self.master.BSpotVar:
                             s.append(str(e[self.master.BSpotVar]))
                         if self.master.BShowGeneExpression:
                             s.append(str(e[col]))
-                        if self.master.BShowAnnotation:
+                        if self.master.BShowAnnotation and self.master.BAnnotationVar:
                             s.append(str(e[self.master.BAnnotationVar]))
                     if body: body += "\n"
                     else: body=""
     a.setMainWidget(ow)
 
     ow.show()
-##    data = orange.ExampleTable('wt-large')
 ##    d = orange.ExampleTable('wt'); d.name = 'wt'
-    d = orange.ExampleTable('wtclassed'); d.name = 'wtclassed'
-    ow.dataset(d, 0)
+    d = orange.ExampleTable('wt-nometa'); d.name = 'wt'
+    ow.dataset(d, 2)
+    d = orange.ExampleTable('wt-nometa'); d.name = 'wt'
+    ow.dataset(d, 1)
+    ow.dataset(None, 1)
+    ow.dataset(None, 2)
 ##    d = orange.ExampleTable('wt2'); d.name = 'wt2'
 ##    ow.dataset(d, 1)
 ##    d = orange.ExampleTable('wt3'); d.name = 'wt3'

OWProcessChipData.py

+"""
+<name>Process Chip Data</name>
+<description>Pre- and post-processing of chip data.</description>
+<category>Genomics</category>
+<icon>icons/ProcessChipData.png</icon>
+<priority>1110</priority>
+"""
+
+from OWWidget import *
+import OWGUI
+from OWChipDataFiles import ChipData
+import chipstat
+
+class OWProcessChipData(OWWidget):
+    settingsList  = ["preStdMethod", "postStdMethod", "preStdRob", "postStdRob", "mergeType", "commitOnChange"]
+
+    def __init__(self, parent=None, name='Chip Data Files'):
+        OWWidget.__init__(self, parent, name, "Process Chip Data pre- and post-processes chip data.")
+        self.callbackDeposit = []
+
+        self.inputs = [("Structured Chip Data", ChipData, self.chipdata)]
+        self.outputs = [("Structured Chip Data", ChipData)]
+
+        self.chipdata = None; self.datasets = None
+        self.std = [("No preprocessing", None),
+                    ("Array-based standardization", chipstat.standardize_arrays),
+                    ("Gene-based standardization", chipstat.standardize_genes),
+                    ("First array-, then gene-based standardization", lambda e,r: chipstat.standardize_genes(chipstat.standardize_arrays(e,r),r)),
+                    ("First gene-, then array-based standardization", lambda e,r: chipstat.standardize_arrays(chipstat.standardize_genes(e,r),r))]
+        # Settings
+        self.data = None
+        self.preStdMethod = 0; self.preStdRob = 1
+        self.postStdMethod = 0; self.postStdRob = 1
+        
+        self.mergeType = 0
+        self.commitOnChange = 0
+        self.loadSettings()
+
+        # GUI
+        # info
+        box = QVGroupBox("Info", self.controlArea)
+        self.infoa = QLabel('No data on input.', box)
+        self.infob = QLabel('', box)
+        
+        # preprocessing
+        OWGUI.separator(self.controlArea)
+        box = QVGroupBox("Preprocessing", self.controlArea)
+        labels = [x[0] for x in self.std]
+        OWGUI.comboBox(box, self, 'preStdMethod', label=None, labelWidth=None, orientation='vertical', items=labels, callback=self.selectionChange)
+        self.preRobBtn = OWGUI.checkBox(box, self, "preStdRob", "Robust standardization", callback=self.selectionChange)
+        
+        # merge
+        OWGUI.separator(self.controlArea)
+        self.mergeTypes = [('mean', 'Mean'), ('median', 'Median'), ('min', 'Minimum expression'), ('max', 'Maximum expression')]
+        labels = [x[1] for x in self.mergeTypes]
+        OWGUI.radioButtonsInBox(self.controlArea, self, 'mergeType', labels, box='Merge Replicas', tooltips=None, callback=self.selectionChange)
+
+        # postprocessing
+        OWGUI.separator(self.controlArea)
+        box = QVGroupBox("Postprocessing", self.controlArea)
+        labels = [x[0] for x in self.std]
+        OWGUI.comboBox(box, self, 'postStdMethod', label=None, labelWidth=None, orientation='vertical', items=labels, callback=self.selectionChange)
+        self.postRobBtn = OWGUI.checkBox(box, self, "postStdRob", "Robust standardization", callback=self.selectionChange)
+
+        # output
+        OWGUI.separator(self.controlArea)
+        box = QVGroupBox("Output", self.controlArea)
+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')
+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.selectionChange, disabled=1)
+
+        self.setBtnsState()
+        self.resize(100,100)
+
+    def selectionChange(self):
+        self.setBtnsState()
+        if self.commitOnChange:
+            self.sendData()
+
+    def chipdata(self, data):
+        self.commitBtn.setEnabled(data <> None)
+        if data:
+            self.data = data
+            nfiles = 0
+            for (n, d) in data:
+                nfiles += len(d)
+            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(data), nfiles))
+            print data
+            d = data[0][1][0]
+            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))
+
+            self.sendData()
+        else:
+            self.send("Structured Chip Data", None)
+
+    # process arrays in the structure, returns new structure
+    def processArrays(self, datastructure, method, *arg):
+        pbStep = 30. / sum([len(data) for (dummy, data) in datastructure])
+        newdata = []
+        for (strainname, data) in datastructure:
+            if data:
+                new = []
+                for e in data:
+                    new.append(apply(method, (e,)+arg))
+                    self.progressBarAdvance(pbStep)
+                newdata.append([strainname, new])
+        return newdata
+
+    def sendData(self):
+        if not self.data:
+            return
+        self.send('Structured Chip Data', None) # this is required for other widgets not to mess up with two different datasets
+        self.progressBarInit()
+        data = self.data
+        # preprocessing
+        if self.preStdMethod: # 0 means no preprocessing
+            data = self.processArrays(data, self.std[self.preStdMethod][1], self.preStdRob)
+        self.progressBarSet(30)
+        # merging
+        merged = chipstat.merge_replicas(data, self.mergeTypes[self.mergeType][0])
+        self.progressBarSet(70)
+        # postprocessing
+        if self.postStdMethod: # 0 means no preprocessing
+            merged = self.processArrays(merged, self.std[self.postStdMethod][1], self.preStdRob)
+        for (i,d) in enumerate(merged):
+            d[1][0].name = self.data[i][0]
+        self.progressBarFinished()
+        self.send('Structured Chip Data', merged)
+            
+    def setBtnsState(self):
+        self.preRobBtn.setEnabled(self.preStdMethod > 0)
+        self.postRobBtn.setEnabled(self.postStdMethod > 0)
+
+if __name__=="__main__":
+    a=QApplication(sys.argv)
+    ow=OWProcessChipData()
+    a.setMainWidget(ow)
+
+    ow.show()
+    a.exec_loop()
+    ow.saveSettings()
+"""
+<name>Select Genes</name>
+<description>Select genes from chip based on input selectors.</description>
+<category>Genomics</category>
+<icon>icons/SelectGenes.png</icon>
+<priority>1150</priority>
+"""
+
+from OWWidget import *
+import OWGUI
+from OWChipDataFiles import ChipData
+from OWChipANOVA import GeneSelection
+import chipstat
+
+class OWSelectGenes(OWWidget):
+    settingsList  = ['negate', 'commitOnChange']
+
+    def __init__(self, parent=None, name='Select Genes'):
+        OWWidget.__init__(self, parent, name, "Select genes from chip based on input selectors")
+        self.callbackDeposit = []
+
+        self.inputs = [("Gene Selection", GeneSelection, self.loadselection, 0), ("Structured Chip Data", ChipData, self.chipdata, 1)]
+        self.outputs = [("Selected Data", ChipData), ("Other Data", ChipData), ("Gene Selection", GeneSelection)]
+
+        self.negate = 1
+        self.selectors = {}
+        self.data = None
+        self.commitOnChange = 1
+        # Settings
+        self.loadSettings()
+
+        # GUI
+        # info
+        box = QVGroupBox("Info", self.controlArea)
+        self.infoa = QLabel('No data on input.', box)
+        self.infob = QLabel('', box)
+        OWGUI.separator(self.controlArea)
+
+        # gene selection
+        box = QVGroupBox("Gene Selection", self.controlArea)
+        OWGUI.checkBox(box, self, 'negate', 'Negate', callback = self.selectionChange)
+
+        # output
+        box = QVGroupBox("Output", self.controlArea)
+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')
+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.senddata, disabled=1)
+
+        self.resize(200,100)
+        
+    def chipdata(self, data):
+        if data:
+            self.data = data
+##            nfiles = 0
+##            for (n, d) in data:
+##                nfiles += len(d)
+##            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(data), nfiles))
+##            d = data[0][1][0]
+##            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))
+            self.senddata()
+        else:
+            self.send("Selected Data", None)
+            self.send("Other Data", None)
+            self.send("Gene Selection", None)
+        self.commitBtn.setEnabled(data <> None)
+
+    def loadselection(self, selector, id):
+        if selector:
+            self.selectors[id] = selector
+        else:
+            del self.selectors[id]
+        print 'SELN',
+        for s in self.selectors.values():
+            print len(s),
+        print
+##        print self.selectors
+        self.infoa.setText('%d selectors on input' % len(self.selectors))
+        self.senddata()
+        self.commitBtn.setEnabled(self.selectors <> None and len(self.selectors))
+
+    def senddata(self):
+        if len(self.selectors):
+            self.progressBarInit()
+            s = self.selectors.values()
+##            match = map(lambda *args: min(args), self.selectors.values())
+            # this needs to be changed!!!
+            match = []
+            n = len(s)
+            for i in range(len(s[0])):
+                mm = []
+                for j in range(n):
+                    mm.append(s[j][i])
+                match.append(min(mm))
+                
+            if self.negate:
+                match = [(1,0)[x] for x in match]
+            self.match = match
+            self.progressBarSet(20)
+
+            self.send("Gene Selection", self.match)
+            if self.data:
+                n = self.match.count(1)
+                if n==len(self.data[0][1][0]): # all genes match
+                    self.progressBarFinished()
+                    self.send("Selected Data", self.data)
+                    self.send("Other Data", None)
+                elif n==0:
+                    self.progressBarFinished()
+                    self.send("Selected Data", None)
+                    self.send("Other Data", self.data)
+                else:
+                    P = []; N = []
+                    pbStep = 80. / len(self.data)
+                    for (strainname, data) in self.data:
+                        dataP = [d.select(self.match) for d in data]
+                        dataN = [d.select(self.match, negate=1) for d in data]
+                        for i in range(len(data)):
+                            dataP[i].name = dataN[i].name = data[i].name
+                        P.append((strainname, dataP))
+                        N.append((strainname, dataN))
+                        self.progressBarAdvance(pbStep)
+                    self.send("Selected Data", P)
+                    self.send("Other Data", N)
+            self.progressBarFinished()
+
+    def selectionChange(self):
+        pass
+
+if __name__=="__main__":
+    a=QApplication(sys.argv)
+    ow=OWSelectGenes()
+    a.setMainWidget(ow)
+    ow.show()
+    a.exec_loop()
+    ow.saveSettings()
+import math
+import Numeric, MA
+import LinearAlgebra
+import stats
+import NumExtn
+import os, os.path
+import orange
+
+#########################################################################
+## standardization of profiles / arrays
+#########################################################################
+
+def standardize_arrays(et, robust=True):
+    """Returns a new example table where values are standardized to mean zero and unit variance columnwise.
+    Option: parametric (mean/stddev) or non-parametric (median/mad).
+    """
+    ma = orng2ma(et)
+    if robust:
+        return ma2orng_keepClassMetas(scaleMad(centerMed(ma)), et)
+    else:
+        return ma2orng_keepClassMetas(scaleStd(centerMean(ma)), et)
+    
+
+def standardize_genes(et, robust=True):
+    """Returns a new example table where values are standardized to mean zero and unit variance rowwise.
+    Option: parametric (mean/stddev) or non-parametric (median/mad).
+    """
+    ma = orng2ma(et)
+    if robust:
+        return ma2orng_keepClassMetas(scaleMad(centerMed(ma,1),1), et)
+    else:
+        return ma2orng_keepClassMetas(scaleStd(centerMean(ma,1),1), et)
+
+
+#########################################################################
+## merge replicas by mean, median, min, max
+#########################################################################
+
+def merge_replicas(aETStruct, type):
+    """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]);
+    type = ["mean" | "median" | "min" | "max"]
+    """
+    shape = [0,0,0]
+    et0 = aETStruct[0][1][0]                                            # the first example table
+    shape[0] = len(et0)                                                 # number of examples (genes)
+    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
+    mergedETStruct = []
+    if type == "mean":
+        merge_func = MA.average
+    elif type == "median":
+        merge_func = NumExtn.medianMA
+    elif type == "min":
+        merge_func = NumExtn.minMA
+    elif type == "max":
+        merge_func = NumExtn.maxMA
+    else:
+        raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']"
+    for st, etList in aETStruct:
+        shape[2] = len(etList)
+        ma3d = MA.zeros(shape, MA.Float)
+        for idx, et in enumerate(etList):
+            ma3d[:,:,idx] = orng2ma(et)
+        mergedETStruct.append((st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])]))
+    return mergedETStruct
+        
+
+
+###########################################################################
+#### average expression profile
+###########################################################################
+##
+##def average_replicas(aETStruct):
+##    """Returns a list of tuples (strain, avrg_orngET) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]).
+##    """
+##    shape = [0,0,0]
+##    et0 = aETStruct[0][1][0]                                            # the first example table
+##    shape[0] = len(et0)                                                 # number of examples (genes)
+##    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
+##    avrgETStruct = []
+##    for st, etList in aETStruct:
+##        shape[2] = len(etList)
+##        ma3d = MA.zeros(shape, MA.Float)
+##        for idx, et in enumerate(etList):
+##            ma3d[:,:,idx] = orng2ma(et)
+##        avrgETStruct.append((st, ma2orng(MA.average(ma3d, 2), et.domain)))
+##    return avrgETStruct
+
+
+#########################################################################
+## ANOVA on genes
+#########################################################################
+
+class AnovaOnGeneResults:
+    """A structure to store the results of ANOVA on genes where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]).
+    """
+    def __init__(self, aETStruct, anovaList):
+        self.classNames = map(lambda x: x[0], aETStruct)
+        self.anovaList = anovaList
+
+
+def anova_on_genes(aETStruct, repMeasuresOnTime=False, callback = None):
+    """Conducts ANOVA on genes and returns a tuple (list of [pTime, pStrain, pInter], list of ANOVA objects needed to conduct posthoc tests)
+    where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]).
+    Note: time points that cause empty cells are removed prior to conducting ANOVA.
+    """
+    ma3d = etStruct2ma3d(aETStruct)
+    rgLens = Numeric.array(map(lambda x: len(x[1]), aETStruct))
+    # arrays to store p-vals
+    FprobTime = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)
+    FprobStrain = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)
+    FprobTimeStrain = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)
+    # store ANOVA objects
+    anovaList = []
+    # decide between non-repeated / repeated measures ANOVA for factor time
+    if repMeasuresOnTime:
+        fAnova = AnovaRM12LR
+    else:
+        fAnova = Anova2wayLR
+    # check for empty cells for all genes at once and remove them
+    tInd2rem = []
+    ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(rgLens)))
+    for tIdx in range(ma3d.shape[1]):
+        for rIdx in range(rgLens.shape[0]):
+            if Numeric.add.reduce(MA.count(ma3d[:,tIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0:
+                tInd2rem.append(tIdx)
+                break
+    if len(tInd2rem) > 0:
+        print "Warning: removing time indices %s for all genes" % (str(tInd2rem))
+        tInd2keep = range(ma3d.shape[1])
+        for tIdx in tInd2rem:
+            tInd2keep.remove(tIdx)
+        ma3d = MA.take(ma3d, tInd2keep, 1)
+    # for each gene...
+    for gIdx in range(ma3d.shape[0]):
+        # check for empty cells for that gene
+        tInd2rem = []
+        for tIdx in range(ma3d.shape[1]):
+            for rIdx in range(rgLens.shape[0]):
+                if MA.count(ma3d[gIdx,tIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]]) == 0:
+                    tInd2rem.append(tIdx)
+                    break
+        ma2d = ma3d[gIdx]
+        if len(tInd2rem) > 0:
+            print "Warning: removing time indices %s for gene %i" % (str(tInd2rem), gIdx)
+            tInd2keep = range(ma3d.shape[1])
+            for tIdx in tInd2rem:
+                tInd2keep.remove(tIdx)
+            ma2d = MA.take(ma2d, tInd2keep, 0)
+        # conduct anova ANOVA
+        an = fAnova(ma2d, rgLens, 1)
+        if callback:
+            callback()
+        FprobTime[gIdx] = an.FAprob
+        FprobStrain[gIdx] = an.FBprob
+        FprobTimeStrain[gIdx] = an.FABprob
+        anovaList.append(an)
+    return Numeric.transpose(Numeric.array([FprobTime, FprobStrain, FprobTimeStrain])).tolist(), AnovaOnGeneResults(aETStruct, anovaList)
+
+
+#########################################################################
+## POSTHOC ANOVA on genes
+#########################################################################
+
+def posthoc_anova_on_genes(className1, className2, aAnovaOnGeneResults, callback=None):
+    """Conduct posthoc anova test for the given class names and returns a list of of [pTime, pStrain, pInter].
+    """
+    cIdx1 = aAnovaOnGeneResults.classNames.index(className1)
+    cIdx2 = aAnovaOnGeneResults.classNames.index(className2)
+    # arrays to store p-vals
+    numGenes = len(aAnovaOnGeneResults.anovaList)
+    FpTime = -1*Numeric.ones((numGenes,), Numeric.Float)
+    FpStrain = -1*Numeric.ones((numGenes,), Numeric.Float)
+    FpTimeStrain = -1*Numeric.ones((numGenes,), Numeric.Float)
+    for idx, an in enumerate(aAnovaOnGeneResults.anovaList):
+        FpTime[idx], FpStrain[idx], FpTimeStrain[idx] = an.single_posthoc_anova_B_AB(cIdx1, cIdx2)
+        if callback: callback()
+    return Numeric.transpose(Numeric.array([FpTime, FpStrain, FpTimeStrain])).tolist()
+        
+
+###################################################################################
+## HELPER FUNCTIONS
+###################################################################################
+
+def scaleStd(nm, axis=0):
+    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)
+    return NumExtn.swapaxesMA(nm / NumExtn.stdMA(nm,0), 0, axis)
+
+def scaleMad(nm, axis=0):
+    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)
+    return NumExtn.swapaxesMA(nm / NumExtn.madMA(nm,0), 0, axis)
+
+
+def centerMean(nm, axis=0):
+    """Returns new masked numarray with values centered by subtracting Median."""
+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)
+    return NumExtn.swapaxesMA(nm - MA.average(nm,0), 0, axis)
+
+
+def centerMed(nm, axis=0):
+    """Returns new masked numarray with values centered by subtracting Median."""
+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)
+    return NumExtn.swapaxesMA(nm - NumExtn.medianMA(nm,0), 0, axis)
+
+
+def getETStruct(path):
+    """Returns a list of tuples (strain, [orngET1, orngET2, ...])
+    """
+    etStruct = []
+    strains = os.listdir(path)
+    for st in strains:
+        pathSt = path + "\\" + st
+        replicas = os.listdir(pathSt)
+        stEts = []
+        for rep in replicas:
+            stEts.append(orange.ExampleTable(pathSt + "\\" + rep))
+        etStruct.append((st, stEts))
+    return etStruct
+
+
+def etStruct2ma3d(aETStruct):
+    """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it.
+    """
+    shape = [0,0,0]
+    et0 = aETStruct[0][1][0]                                            # the first example table
+    shape[0] = len(et0)                                                 # number of examples (genes)
+    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
+    shape[2] = Numeric.add.reduce(map(lambda x: len(x[1]), aETStruct))  # number of ETs (replicas over all strains)
+    ma3d = MA.zeros(shape, MA.Float)
+    k = 0
+    for st, etList in aETStruct:
+        for et in etList:
+            ma3d[:,:,k] = orng2ma(et)
+            k += 1
+    return ma3d
+
+
+def orng2ma(aExampleTable):
+    """Converts orange.ExampleTable to MA.array based on the attribute values.
+    rows correspond to examples, columns correspond to attributes, class values are left out
+    missing values and attributes of types other than orange.FloatVariable are masked
+    """
+    vals = aExampleTable.native(0, substituteDK="?", substituteDC="?", substituteOther="?")
+    ma = MA.array(vals, MA.PyObject)
+    if aExampleTable.domain.classVar != None:
+        ma = ma[:,:-1]
+    mask = MA.where(MA.equal(ma, "?"), 1, 0)
+    for varIdx, var in enumerate(aExampleTable.domain.attributes):
+        if type(var) != orange.FloatVariable:
+            mask[:,varIdx] = Numeric.ones(len(aExampleTable))
+    return MA.array(MA.array(ma, MA.PyObject, mask=mask).filled(1e20), MA.Float, mask=mask)
+
+
+def ma2orng(arr2d, aDomain):
+    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.
+    masked values converted to "?" (don't knows)
+    arr2d.shape[1] must be equal to the number of the attributes of the given domain
+    domain attributes mut be of type orange.FloatVariable
+    """
+    arr2d = MA.asarray(arr2d, MA.PyObject)
+    assert MA.rank(arr2d) == 2, "2d array expected"
+    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"
+    if aDomain.classVar != None:
+        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))
+        return orange.ExampleTable(aDomain, et)
+    else:
+        return orange.ExampleTable(aDomain, arr2d.tolist("?"))
+
+
+def ma2orng_keepClassMetas(arr2d, aExampleTable):
+    """Creates new example table where attribute values correspond to the given 2D array, class and meta attributes remain unchanged.
+    """
+    arr2d = MA.asarray(arr2d, MA.PyObject)
+    assert MA.rank(arr2d) == 2, "2D array expected"
+    assert arr2d.shape[0] == len(aExampleTable), "arr2d.shape[0] != len(aExampleTable)"
+    assert arr2d.shape[1] == len(aExampleTable.domain.attributes), "arr2d.shape[1] != len(aExampleTable.domain.attributes)"
+    domAtt = orange.Domain(aExampleTable.domain.attributes, None)
+    if aExampleTable.domain.classVar != None:
+        domClassMeta = orange.Domain([aExampleTable.domain.classVar])
+    else:
+        domClassMeta = orange.Domain([])
+    domClassMeta.addmetas(aExampleTable.domain.getmetas())
+    etAtt = orange.ExampleTable(domAtt, arr2d.tolist("?"))
+    etClassMeta = orange.ExampleTable(domClassMeta, aExampleTable)
+    return orange.ExampleTable([etAtt, etClassMeta])
+
+
+###################################################################################
+## PURE ANOVA BASED ON LINEAR REGRESSION
+###################################################################################
+
+class AnovaLRBase:
+    """Base class for all ANOVA types based on multivariate linear regression: manipulation with dummy variables.
+    """
+
+    def getDummyRC(self, numVars, numReplicas=1):
+        """Returns 2D array of reference cell encoded dummy variables.
+        """
+        if not type(numReplicas) == int:
+            numReplicas = Numeric.asarray(numReplicas)
+            assert len(numReplicas) == numVars, "numReplicas: int or argument of length numVars expected"
+        return Numeric.repeat(Numeric.concatenate((Numeric.zeros((1,numVars-1), Numeric.Float), Numeric.identity(numVars-1, Numeric.Float)), 0), numReplicas, 0)
+        
+
+    def getDummyEE(self, numVars, numReplicas=1):
+        """Returns 2D array of effects endoced dummy variables, shape (n, n-1) where n = numVars*numReplicas | sum(numReplicas).
+        Each row represents a code for a single variable value, encoded by numVar-1 dummy variables.
+        numReplicas = int | list | array of shape (numVars,): the number (or list) of observations per cell.
+        """
+        assert numVars > 0, "numVars <= 0"
+        if type(numReplicas) == int:
+            numReplicas = Numeric.ones((numVars,)) * numReplicas
+        else:
+            numReplicas = Numeric.asarray(numReplicas)
+            assert len(numReplicas) == numVars, "numReplicas: int or argument of length numVars expected"
+        if numVars == 1:
+            return Numeric.zeros((numReplicas[0], 0))
+        else:
+            return Numeric.repeat(Numeric.concatenate((Numeric.identity(numVars-1, Numeric.Int), -1*Numeric.ones((1,numVars-1), Numeric.Int)), 0), numReplicas, 0)
+        
+
+    def getSubjectDummyEE(self, numSubjList, numReplicas=1):
+        """Returns 2D array of effects endoced subject dummy variables, shape (sum(ni), sum(ni-1)) where numSubjList=[n0,n1,...].
+        numSubjList: a list of the number of subjects within each level of the non-repeated factor.
+        numReplicas: int | list | array of shape sum(ni): the number of times each subject code (row) is repeated.
+        """
+        numSubjList = Numeric.asarray(numSubjList)
+        assert len(numSubjList.shape) == 1, "len(numSubjList.shape) != 1"
+        if type(numReplicas) == int:
+            numReplicas = Numeric.ones((numSubjList.shape[0],)) * numReplicas
+        else:
+            numReplicas = Numeric.asarray(numReplicas)
+            assert len(numReplicas.shape) == 1, "len(numReplicas.shape) != 1"
+            assert numReplicas.shape[0] == numSubjList.shape[0], "numReplicas: int or a list of the same length as numSubjList expected"
+        si0 = Numeric.concatenate(([0], Numeric.add.accumulate(numSubjList)))      # indices for axis 0 of dm
+        si1 = Numeric.concatenate(([0], Numeric.add.accumulate(numSubjList-1)))    # indices for axis 1 of dm
+        dm = Numeric.zeros((si0[-1], si1[-1]))                                     # subject dummy codes matrix
+        numRepeats = Numeric.ones((si0[-1],))
+        for i in range(numSubjList.shape[0]):
+            if numSubjList[i] == 1: print "Warning: a single subject in group %i" % (i)
+            dm[si0[i]:si0[i+1], si1[i]:si1[i+1]] = self.getDummyEE(numSubjList[i],1)
+            numRepeats[si0[i]:si0[i+1]] = Numeric.ones((numSubjList[i],)) * numReplicas[i]
+        return Numeric.repeat(dm, numRepeats, 0)
+
+
+    def getDummiesJoin(self, dummyMtrx1, dummyMtrx2):
+        """Returns a tuple of resized dummy matrices containing the cartesian product of rows.
+        """
+        dummyMtrx1 = Numeric.repeat(dummyMtrx1, dummyMtrx2.shape[0], 0)                         # repeat each row separately
+        dummyMtrx2 = Numeric.resize(dummyMtrx2, (dummyMtrx1.shape[0], dummyMtrx2.shape[1]))     # repeat whole matrix, add new rows
+        return (dummyMtrx1, dummyMtrx2)
+
+
+    def getDummyInteraction(self, dummyMtrx1, dummyMtrx2):
+        """Returns a product of dummy matrices resized to the cartesian product of columns.
+        """
+        dmIntShp = (dummyMtrx1.shape[0], dummyMtrx1.shape[1] * dummyMtrx2.shape[1])
+        dmInt1 = Numeric.repeat(dummyMtrx1, dummyMtrx2.shape[1], 1)                             # repeat each column separately
+        dmInt2 = Numeric.transpose(Numeric.resize(Numeric.transpose(dummyMtrx2, (1,0)), (dmIntShp[1], dmIntShp[0])), (1,0)) # repeat whole matrix, add new columns
+        return dmInt1*dmInt2
+
+
+class Anova2wayLRBase(AnovaLRBase):
+    """Base class for all 2 way ANOVA types (repeated and non-repeated measures) includes various post-hoc tests.
+    The following variables should be defined by base classes:
+        self._addInteraction = addInteraction
+        self._arr2d = arr2d
+        self._groupLens = groupLens
+        self._MSpool = LR_treat.MSres
+        self._DFpool = LR_treat.MSreg
+    """
+
+    def posthoc_tstat_B(self):
+        """Fisher LSD: Compares all levels of factor B and returns (b,b) matrix of t-stat values and a matrix of corresponding p-values.
+        Use when there is no significant interaction.
+        Warning: Statistica computes it WRONG!
+        """
+        b = self._groupLens.shape[0]
+        # precompute averages
+        x_avrg = Numeric.zeros((b,), Numeric.Float)         # mean of cell means over factor A
+        sum_count_x = Numeric.zeros((b,), Numeric.Float)
+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())
+        for idx in range(b):
+            groupInd = range(groupLensAcc0[idx], groupLensAcc0[idx+1])
+            xi = MA.take(self._arr2d, groupInd, 1)
+            x_avrg[idx] = MA.average(MA.average(xi,1))                  # first average over replicas to obtain cell mean, then over factor A
+            sum_count_x[idx] = Numeric.add.reduce(1./MA.count(xi,1))    # first count the number of measurements within cells, then sum inverses over factor A
+            ##x_avrg[idx] = MA.average(MA.ravel(xi))                    # this is how Statistica computes it, but it is WRONG!
+            ##sum_count_x[idx] = 1./MA.count(xi)                        # this is how Statistica computes it, but it is WRONG!
+        # t-statistics
+        x_avrg_2 = MA.resize(x_avrg, (x_avrg.shape[0], x_avrg.shape[0]))
+        sum_count_x_2 = Numeric.resize(sum_count_x, (sum_count_x.shape[0],sum_count_x.shape[0]))
+        tstat = (MA.transpose(x_avrg_2) - x_avrg_2) * self._arr2d.shape[0] / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2))
+        ##tstat = (MA.transpose(x_avrg_2) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2))     # this is how Statistica computes it, but it is WRONG!
+        tprob = NumExtn.triangularPut(stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + NumExtn.triangularGet(tstat**2))),1,1)
+        return tstat, tprob
+
+
+    def isSignif_holm_B(self, alpha):
+        """Conduct Holm step-down sequential multiple comparison on factor B.
+        Returns (b,b) matrix indicating significant differences between levels of factor B and the direction of changes [-1|0|1].
+        """
+        tstat, pvals = self.posthoc_tstat_B()
+        pvals = NumExtn.triangularGet(pvals)
+        sortInd = Numeric.argsort(pvals)
+        k = pvals.shape[0]
+        isSignif = -1*Numeric.ones((k,), Numeric.Int)
+        for j in range(k):
+            isSignif[sortInd[j]] = pvals[sortInd[j]] < float(alpha) / (k-j)
+        return NumExtn.triangularPut(isSignif, upper=1, lower=1) * (MA.greater(tstat,0) - MA.less(tstat,0))
+
+
+    def posthoc_tstat_BbyA(self):
+        """Fisher LSD: Compares all levels of factor B by each level of factor A separately.
+        Returns: (b,b,a) matrix of t-stat values
+                 (b,b,a) matrix of corresponding p-values
+                 (b,b) matrix of geometric means of p-values over all levels of factor A.
+        Use when there is a significant interaction and factor A is of no interest.
+        """
+        a = self._arr2d.shape[0]
+        b = self._groupLens.shape[0]
+        # precompute averages
+        x_avrg = Numeric.zeros((a,b), Numeric.Float)
+        sum_count_x = Numeric.zeros((a,b), Numeric.Float)
+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())
+        for idx in range(b):
+            groupInd = range(groupLensAcc0[idx], groupLensAcc0[idx+1])
+            xi = MA.take(self._arr2d, groupInd, 1)
+            x_avrg[:,idx] = MA.average(xi, 1)
+            sum_count_x[:,idx] = 1. / MA.count(xi, 1)
+        # t-statistics
+        x_avrg_2 = MA.resize(x_avrg, (b,a,b))
+        sum_count_x_2 = Numeric.resize(sum_count_x, (b,a,b))
+        tstat = (MA.transpose(x_avrg_2, (2,1,0)) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2, (2,1,0)) + sum_count_x_2))
+        # get p-values for each level of factor a separately (axis 1)
+        tprob = MA.array(-1*Numeric.ones(tstat.shape, Numeric.Float), mask=Numeric.ones(tstat.shape))
+        for idx1 in range(tprob.shape[1]):
+            tprob[:,idx1,:] = NumExtn.triangularPut(stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + NumExtn.triangularGet(tstat[:,idx1,:]**2))),1,1)
+        return tstat, tprob
+
+
+    def posthoc_anova_B_AB(self):
+        """Conduct ANOVA for each pair of levels of factor B to detect interaction effect.
+        Returns for each pair of factor B levels:
+            (b,b) matrix of F-stat values for effect of factor B 
+            (b,b) matrix of p-values for effect of factor B
+            (b,b) matrix of F-stat values for interaction effect
+            (b,b) matrix of p-values for interaction effect
+        """
+        if self._addInteraction != 1:
+            raise "Error: posthoc_anova_B_AB can be conducted only when the interaction effect has been tested"
+        b = self._groupLens.shape[0]
+        FB = MA.masked * MA.ones((b,b), MA.Float)
+        FBprob = MA.masked * MA.ones((b,b), MA.Float)
+        FAB = MA.masked * MA.ones((b,b), MA.Float)
+        FABprob = MA.masked * MA.ones((b,b), MA.Float)
+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())
+        groupInd = map(lambda i,j: range(i,j), groupLensAcc0[:-1],groupLensAcc0[1:])
+        for i in range(b):
+            for j in range(i+1,b):
+                takeInd = groupInd[i] + groupInd[j]
+                an = self.__class__(MA.take(self._arr2d, takeInd, 1), [self._groupLens[i],self._groupLens[j]], addInteraction=1)
+                FB[i,j] = FB[j,i] = an.FB
+                FBprob[i,j] = FBprob[j,i] = an.FBprob
+                FAB[i,j] = FAB[j,i] = an.FAB
+                FABprob[i,j] = FABprob[j,i] = an.FABprob
+        return FB, FBprob, FAB, FABprob
+
+
+    def single_posthoc_anova_B_AB(self, bLevel1, bLevel2):
+        """Conduct ANOVA for a specific pair of factor B levels and returns (FprobA, FprobB, FprobAB)
+        """
+        if self._addInteraction != 1:
+            raise "Error: single_posthoc_anova_B_AB can be conducted only when the interaction effect has been tested"
+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())
+        groupInd = map(lambda i,j: range(i,j), groupLensAcc0[:-1],groupLensAcc0[1:])
+        takeInd = groupInd[bLevel1] + groupInd[bLevel2]
+        an = self.__class__(MA.take(self._arr2d, takeInd, 1), [self._groupLens[bLevel1],self._groupLens[bLevel2]], addInteraction=1)
+        return an.FAprob, an.FBprob, an.FABprob
+
+    
+class Anova2wayLR(Anova2wayLRBase):
+    """2 way ANOVA with multiple observations per cell for unbalanced designs using multivariate linear regression.
+    Multiple observations given at axis 1 together with the list of group lenghts, e.g. [3,2].
+    Supports balanced and unbalanced designs, i.e. different number of observations per cell and missing (masked) data.
+    TODO: account for empty cells (this implementation results in singular design matrix)
+    """
+
+    def __init__(self, arr2d, groupLens, addInteraction=0):
+        """arr2d: 2D masked array where replicas are given at axis 1 together with lenghts of the groups (multiple observations per cell).
+        addInteraction: include / exclude the interaction between factors
+        """
+        arr2d = MA.asarray(arr2d)
+        groupLens = Numeric.array(groupLens)    # make a copy for the case if subjects or levels of factor B are removed
+        assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2"
+
+        # check if there exist factor-levels with all values missing (for A and B)
+        # if so, reduce the corresponding DF and fix the number of dummy variables
+        missIndA = Numeric.compress(MA.count(arr2d, 1) == 0, Numeric.arange(arr2d.shape[0]))
+        if missIndA.shape[0] > 0:
+            print "Warning: removig factor A level(s) %s" % str(missIndA.tolist())
+            takeIndA = Numeric.compress(MA.count(arr2d, 1) != 0, Numeric.arange(arr2d.shape[0]))
+            arr2d = MA.take(arr2d, takeIndA, 0)
+        missIndSubj = Numeric.compress(MA.count(arr2d, 0) == 0, Numeric.arange(arr2d.shape[1]))
+        if missIndSubj.shape[0] > 0:
+            takeIndSubj = Numeric.compress(MA.count(arr2d, 0) != 0, Numeric.arange(arr2d.shape[1]))
+            arr2d = MA.take(arr2d, takeIndSubj, 1)
+            # fix groupLens
+            mapSubj2Group = -1*Numeric.ones(arr2d.shape[1])
+            groupLensAcc0 = [0] + Numeric.add.accumulate(groupLens).tolist()
+            for i in range(len(groupLensAcc0) - 1):
+                mapSubj2Group[groupLensAcc0[i]:groupLensAcc0[i+1]] = i
+            for subjIdx in missIndSubj:
+                groupLens[mapSubj2Group[subjIdx]] -= 1
+        # fix number of factor B levels
+        missIndB = Numeric.compress(groupLens <= 0, Numeric.arange(groupLens.shape[0]))
+        if missIndB.shape[0] > 0:
+            print "Warning: removig factor B level(s) %s" % str(missIndB.tolist())
+            takeIndB = Numeric.compress(groupLens > 0, Numeric.arange(groupLens.shape[0]))
+            groupLens = Numeric.take(groupLens, takeIndB)
+            # arr2d has already been taken care of by removing the subjects without observations
+
+        # check if data is balanced
+        isBalanced = (1. * Numeric.add.reduce((1.*groupLens/groupLens[0]) == Numeric.ones(groupLens.shape[0])) / groupLens.shape[0]) == 1
+
+        # check for empty cells, raise exception if empty cells exist and addInteraction=1
+        if addInteraction:
+            ax1Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens)))
+            for idx in range(groupLens.shape[0]):
+                if Numeric.add.reduce(MA.count(arr2d[:,ax1Ind[idx]:ax1Ind[idx+1]],1) == 0) > 0:
+                    raise ValueError, "arr2d has empty cells, cannot evaluate the interaction between factors, try addInteraction=0"
+
+        # remove missing observations and the corresponding dummy variable codes
+        self.y = MA.ravel(arr2d) # [a0b0, a0b1, .., a1b0, ...]
+        noMissing = MA.count(self.y) == self.y.shape[0]
+        self.y, takeInd = NumExtn.compressIndices(self.y)
+
+        # dummy variables
+        self.dummyA = self.getDummyEE(arr2d.shape[0], 1)
+        self.dummyB = self.getDummyEE(len(groupLens), groupLens)
+        self.dummyA, self.dummyB = self.getDummiesJoin(self.dummyA, self.dummyB)
+        if addInteraction:
+            self.dummyAB = self.getDummyInteraction(self.dummyA, self.dummyB)
+            self.dummyAB = Numeric.take(self.dummyAB, takeInd, 0)
+        self.dummyA = Numeric.take(self.dummyA, takeInd, 0)
+        self.dummyB = Numeric.take(self.dummyB, takeInd, 0)
+
+        # run analysis
+        if addInteraction:
+            LR_treat = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB, self.dummyAB), 1), self.y)                
+        else:
+            LR_treat = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB),1), self.y)
+            
+        if isBalanced and noMissing:
+            LR_A = MultLinReg(self.dummyA, self.y)
+            LR_B = MultLinReg(self.dummyB, self.y)
+            self.FA = LR_A.MSreg / LR_treat.MSres
+            self.FB = LR_B.MSreg / LR_treat.MSres
+            if addInteraction:
+                LR_AB = MultLinReg(self.dummyAB, self.y)
+                self.FAB = LR_AB.MSreg / LR_treat.MSres
+        else:
+            if addInteraction:
+                LR_A_B = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB), 1), self.y)
+                LR_A_AB = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyAB), 1), self.y)
+                LR_B_AB = MultLinReg(Numeric.concatenate((self.dummyB, self.dummyAB), 1), self.y)
+                self.FA = (LR_treat.SSreg - LR_B_AB.SSreg) / self.dummyA.shape[1] / LR_treat.MSres
+                self.FB = (LR_treat.SSreg - LR_A_AB.SSreg) / self.dummyB.shape[1] / LR_treat.MSres
+                self.FAB = (LR_treat.SSreg - LR_A_B.SSreg) / self.dummyAB.shape[1] / LR_treat.MSres
+            else:
+                LR_A = MultLinReg(self.dummyA, self.y)
+                LR_B = MultLinReg(self.dummyB, self.y)
+                self.FA = (LR_treat.SSreg - LR_B.SSreg) / self.dummyA.shape[1] / LR_treat.MSres
+                self.FB = (LR_treat.SSreg - LR_A.SSreg) / self.dummyB.shape[1] / LR_treat.MSres
+
+        self.FAprob = stats.fprob(self.dummyA.shape[1], LR_treat.DFres, self.FA)
+        self.FBprob = stats.fprob(self.dummyB.shape[1], LR_treat.DFres, self.FB)
+        if addInteraction:
+            self.FABprob = stats.fprob(self.dummyAB.shape[1], LR_treat.DFres, self.FAB)
+
+        # store variables needed for posthoc tests
+        self._addInteraction = addInteraction
+        self._arr2d = arr2d
+        self._groupLens = groupLens
+        self._MSpool = LR_treat.MSres
+        self._DFpool = LR_treat.MSreg
+
+
+class AnovaRM12LR(Anova2wayLRBase):
+    """2 way ANOVA with REPEATED MEASURES on factor A using multivariate linear regression.
+    Axis 0 correspond to different levels of factor A, subjects given at axis 1, subjects nested inside factor B according to the given group lenghts.
+    Factor A is a within-subject effect, factor B is a between-ubject effect.
+    Example:
+        arr2d = [[a0b0 a0b0 a0b1 a0b1 a0b1], [a1b0 a1b0 a1b1 a1b1 a1b1]]: 2 levels of factor A, 2 levels of factor B, 5 subjects
+        groupLens = [2,3]
+    Supports balanced and unbalanced designs, i.e. different number of observations per cell and missing (masked) data.
+    TODO: fix the F and DF for factor B when there are missing values (see Glantz, pp. 462-5)
+    """
+
+    def __init__(self, arr2d, groupLens, addInteraction=0):
+        """arr2d: 2D masked array: [[a0b0 a0b0 a0b1 a0b1 a0b1], [a1b0 a1b0 a1b1 a1b1 a1b1]]
+        groupLens: lenghts of axis 1 indices that belong to the same level of factor B, e.g. [2,3]
+        addInteraction: include / exclude the interaction between factors A and B
+        """
+        arr2d = MA.asarray(arr2d)
+        groupLens = Numeric.array(groupLens)    # make a copy for the case if subjects or levels of factor B are removed
+        assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2"
+        assert arr2d.shape[1] == Numeric.add.reduce(groupLens), "arr2d.shape[1] != Numeric.add.reduce(groupLens)"
+
+        # check if there exist factor-levels with all values missing (for A, B and Subj)
+        # if so, reduce the corresponding DF and fix the number of dummy variables
+        missIndA = Numeric.compress(MA.count(arr2d, 1) == 0, Numeric.arange(arr2d.shape[0]))
+        if missIndA.shape[0] > 0:
+            print "Warning: removig factor A level(s) %s" % str(missIndA.tolist())
+            takeIndA = Numeric.compress(MA.count(arr2d, 1) != 0, Numeric.arange(arr2d.shape[0]))
+            arr2d = MA.take(arr2d, takeIndA, 0)
+        missIndSubj = Numeric.compress(MA.count(arr2d, 0) == 0, Numeric.arange(arr2d.shape[1]))
+        if missIndSubj.shape[0] > 0:
+            print "Warning: removig subject(s) %s" % str(missIndSubj.tolist())
+            takeIndSubj = Numeric.compress(MA.count(arr2d, 0) != 0, Numeric.arange(arr2d.shape[1]))
+            arr2d = MA.take(arr2d, takeIndSubj, 1)
+            # fix groupLens
+            mapSubj2Group = -1*Numeric.ones(arr2d.shape[1])
+            groupLensAcc0 = [0] + Numeric.add.accumulate(groupLens).tolist()
+            for i in range(len(groupLensAcc0) - 1):
+                mapSubj2Group[groupLensAcc0[i]:groupLensAcc0[i+1]] = i
+            for subjIdx in missIndSubj:
+                groupLens[mapSubj2Group[subjIdx]] -= 1
+        # fix number of factor B levels
+        missIndB = Numeric.compress(groupLens <= 0, Numeric.arange(groupLens.shape[0]))
+        if missIndB.shape[0] > 0:
+            print "Warning: removig factor B level(s) %s" % str(missIndB.tolist())
+            takeIndB = Numeric.compress(groupLens > 0, Numeric.arange(groupLens.shape[0]))
+            groupLens = Numeric.take(groupLens, takeIndB)
+            # arr2d has already been taken care of by removing the subjects without observations
+
+        # remove other missing observations and the corresponding dummy variable codes
+        self.y = MA.ravel(arr2d) # [a0b0, a0b1, .., a1b0, ...]
+        noMissing = MA.count(self.y) == self.y.shape[0]
+        self.y, takeInd = NumExtn.compressIndices(self.y)
+
+        # check if data is balanced
+        isBalanced = (1. * Numeric.add.reduce((1.*groupLens/groupLens[0]) == Numeric.ones(groupLens.shape[0])) / groupLens.shape[0]) == 1
+
+        # dummy variables
+        self.dummyA = self.getDummyEE(arr2d.shape[0], 1)
+        self.dummyB = self.getDummyEE(groupLens.shape[0], groupLens)
+        self.dummySubjInB = self.getSubjectDummyEE(groupLens, 1)
+        dummyB_Subj = Numeric.concatenate((self.dummyB, self.dummySubjInB), 1)
+        self.dummyA, dummyB_Subj = self.getDummiesJoin(self.dummyA, dummyB_Subj)
+        self.dummyB = dummyB_Subj[:, 0:self.dummyB.shape[1]]
+        self.dummySubjInB = dummyB_Subj[:, self.dummyB.shape[1]:]
+        if addInteraction: