# Commits

committed 4d93222

no message

# NumExtn.py

`+import math`
`+import Numeric`
`+import MA`
`+import MLab, LinearAlgebra`
`+`
`+import numarray as NA`
`+import numarray.ma as NM`
`+import numarray.linear_algebra as LA`
`+`
`+`
`+#####################################################################################`
`+## Strictly upper (lower) triangular matrix <-> 1D array`
`+#####################################################################################`
`+`
`+def triangularPut(m1d, upper=1, lower=0):`
`+    """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle.`
`+    Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix.`
`+    The lower triangular part (if requested) equals to the transposed upper triangular part.`
`+    If upper == lower == 1 a symetric matrix is returned.`
`+    """`
`+    assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower"`
`+    m1d = MA.asarray(m1d)`
`+    assert MA.rank(m1d) == 1, "1D masked array expected"`
`+    m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0]))`
`+    assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2"`
`+    if upper:`
`+        if lower:`
`+            mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0))`
`+        else:`
`+            mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0))`
`+    else:`
`+        if lower:`
`+            mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0))`
`+        else:`
`+            mask = Numeric.ones((m2dShape0, m2dShape0))`
`+`
`+    m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.typecode()))`
`+    condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0))`
`+    putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int))`
`+    MA.put(m2d, putIndices, m1d)`
`+    m2d = MA.reshape(m2d, (m2dShape0, m2dShape0))`
`+    m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d))`
`+    return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))`
`+`
`+`
`+def triangularGet(m2d, upper=1):`
`+    """Returns 1D masked array with elements from the upper (lower) triangular part of the given matrix.`
`+    For a symetric matrix triangularGet(m2d, 0) and triangularGet(m2d, 1) return elements in different order.`
`+    """`
`+    assert upper in [0,1], "upper: [0|1] expected"`
`+    m2d = MA.asarray(m2d)`
`+    assert MA.rank(m2d) == 2, "2D (masked) array expected"`
`+    if upper:`
`+        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i<j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))`
`+    else:`
`+        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i>j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))`
`+    return MA.take(MA.ravel(m2d), takeInd)`
`+`
`+`
`+#####################################################################################`
`+## Put 1D array on the diagonal of the given 2D array and returns a copy.`
`+#####################################################################################`
`+`
`+def diagonalPut(m1d, m2d):`
`+    """Puts the given 1D masked array into the diagonal of the given 2D masked array and returns a new copy of the 2D array.`
`+    """`
`+    m1d = MA.asarray(m1d)`
`+    m2d = MA.asarray(m2d)`
`+    assert MA.rank(m1d) == 1 and MA.rank(m2d) == 2, "1D and 2D masked array expected"`
`+    assert m1d.shape[0] == m2d.shape[0] == m2d.shape[1], "the shape of the given arrays does not match"`
`+    putIndices = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i==j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))`
`+    m2dShape = m2d.shape`
`+    m2d = MA.ravel(m2d)`
`+    MA.put(m2d, putIndices, m1d)`
`+    return MA.reshape(m2d, m2dShape)`
`+    `
`+`
`+#####################################################################################`
`+#### linear interpolation of missing values by the given axis`
`+#####################################################################################`
`+##`
`+##def fillLinear1(ma, axis=0):`
`+##    """BUG: does not handle successive missing values`
`+##    """`
`+##    maflat = MA.array(MA.ravel(ma))`
`+##    mod = ma.shape[axis]`
`+##    maskInd = Numeric.compress(MA.getmaskarray(maflat), Numeric.arange(maflat.shape[0]))`
`+##    for idx in maskInd:`
`+##        if (idx % mod) == 0:  # the first one`
`+##            maflat[idx] = maflat[idx+1]`
`+##        elif (idx % mod) == mod-1:`
`+##            maflat[idx] = maflat[idx-1]`
`+##        else:`
`+##            maflat[idx] = (maflat[idx-1] + maflat[idx+1]) / 2.`
`+##    return MA.reshape(maflat, ma.shape)`
`+##`
`+##`
`+##def fillLinear2(ma, axis=0):`
`+##    """BUG: does not handle successive missing values`
`+##    """`
`+##    maflat = MA.array(MA.ravel(ma))`
`+##    mod = ma.shape[axis]`
`+##    maskInd = Numeric.compress(MA.getmaskarray(maflat), Numeric.arange(maflat.shape[0]))`
`+##    for idx in maskInd:`
`+##        idxFirst = idx-(idx%mod)`
`+##        idxNext = idxFirst+mod`
`+##        if MA.count(maflat[idxFirst:idxNext]) == 0:                                 # all elements are masked`
`+##            print "Warning: cannot do linear interpolation, all values are masked"`
`+##        elif idx == idxFirst:                                                        # the first element is masked`
`+##            maflat[idx] = maflat[idx+1:idxNext].compressed()[0]`
`+##        elif idx == idxNext-1:                                                      # the last element is masked`
`+##            maflat[idx] = maflat[idxFirst:idx].compressed()[-1]`
`+##        else:`
`+##            maflat[idx] = MA.average([MA.take(maflat,range(idx-1,idxFirst-1,-1)).compressed(), maflat[idx+1:idxNext].compressed(), MA.masked])`
`+####            maflat[idx] = (maflat[idxFirst:idx].compressed()[-1] + maflat[idx+1:idxNext].compressed()[0]) / 2.`
`+##    return MA.reshape(maflat, ma.shape)`
`+##`
`+##def fillLinear(ma, axis=0):`
`+##    """Linear interpolation of missing values;`
`+##    """`
`+##    pass`
`+##`
`+##`
`+##si = scipy.interpolate.interp1d(Numeric.array([[0,1,3,4],[0,1,3,4]]), Numeric.array([[0.1,1.1,3.2,4.4],[0,1,3,4]]))`
`+##si(2)`
`+`
`+###################################################################################`
`+## swapaxes for masked array`
`+###################################################################################`
`+`
`+def swapaxesMA(ma, axis1, axis2):`
`+    if axis1 < axis2:`
`+        m,M = axis1, axis2`
`+    elif axis1 > axis2:`
`+        m,M = axis2, axis1`
`+    elif 0 <= axis1 == axis2 < len(ma.shape):`
`+        return ma`
`+    else:`
`+        raise ValueError("Bad axis1 or axis2 argument to swapaxes.")`
`+    axList = range(m) + [M] + range(m+1,M) + [m] + range(M+1,len(ma.shape))`
`+    return MA.transpose(ma, axList)`
`+`
`+`
`+###################################################################################`
`+## compress MA to Numeric and return Numeric + indices of non-masked places`
`+###################################################################################`
`+`
`+def compressIndices(ma):`
`+    """Returns 1D compressed Numeric array and the indices of the non-masked places.`
`+    usage:  nu,ind = compressIndices(ma)`
`+            nu = Numeric.f_elementwise(nu)`
`+            ma = MA.put(ma, ind, nu)`
`+    """`
`+    ma = MA.asarray(ma)`
`+    nonMaskedInd = Numeric.compress(1-Numeric.ravel(MA.getmaskarray(ma)), Numeric.arange(Numeric.multiply.reduce(ma.shape)))`
`+    return MA.filled(ma.compressed()), nonMaskedInd`
`+`
`+`
`+###################################################################################`
`+## compact print of Numeric or MA`
`+###################################################################################`
`+`
`+def aprint(a,decimals=2):`
`+    """prints array / masked array of floats"""`
`+    if type(a) == Numeric.ArrayType:`
`+        print Numeric.around(a.astype(Numeric.Float0),decimals)`
`+    elif type(a) == MA.MaskedArray:`
`+        print MA.around(a.astype(MA.Float0),decimals)`
`+`
`+`
`+###################################################################################`
`+## Numeric -> numarray`
`+## MA -> numarray.ma`
`+###################################################################################`
`+`
`+def _nu2na(a):`
`+    """Numeric -> numarray"""`
`+    a = Numeric.asarray(a)`
`+    return NA.array(a)`
`+`
`+def _ma2nm(m):`
`+    """MA -> numarray.ma"""`
`+    m = MA.asarray(m)`
`+    return NM.array(m.raw_data(), mask=m.raw_mask())`
`+`
`+`
`+###################################################################################`
`+## numarray -> Numeric`
`+## numarray.ma -> MA`
`+###################################################################################`
`+`
`+def _na2nu(a):`
`+    """numarray -> Numeric"""`
`+    a = NA.asarray(a)`
`+    return Numeric.array(a)`
`+`
`+def _nm2ma(m):`
`+    """numarray.ma -> MA"""`
`+    m = NM.asarray(m)`
`+    return MA.array(m.raw_data(), mask=m.raw_mask())`
`+`
`+`
`+###################################################################################`
`+## min / max along the given axis`
`+## SLOW DUE TO SORT`
`+###################################################################################`
`+`
`+def minMA(m,axis=0):`
`+    """slow: remove sorting"""`
`+    m = MA.asarray(m, MA.Float)`
`+    transList = [axis] + range(0,axis) + range(axis+1,MA.rank(m))`
`+    m = MA.transpose(m, transList)    # do not use swapaxes`
`+    return MA.sort(m, 0, fill_value=1e20)[0]`
`+    `
`+def maxMA(m,axis=0):`
`+    """slow: remove sorting"""`
`+    m = MA.asarray(m, MA.Float)`
`+    transList = [axis] + range(0,axis) + range(axis+1,MA.rank(m))`
`+    m = MA.transpose(m, transList)    # do not use swapaxes`
`+    return MA.sort(m, 0, fill_value=-1e20)[-1]`
`+`
`+###################################################################################`
`+## median`
`+###################################################################################`
`+`
`+def median(m,axis=0):`
`+    m = Numeric.asarray(m)`
`+    return MLab.median(Numeric.transpose(m, [axis]+range(axis)+range(axis+1,Numeric.rank(m))))  # do not use swapaxes`
`+`
`+`
`+def medianMA(m,axis=0):`
`+    """Returns median of the given masked array along the given axis."""`
`+    return _nm2ma(_medianNM(_ma2nm(m),axis))`
`+`
`+`
`+def _medianNA(m,axis=0):`
`+    m = NA.asarray(m)`
`+    return LA.mlab.median(NA.transpose(m, [axis]+range(axis)+range(axis+1,m.rank)))   # do not use swapaxes`
`+`
`+def _medianNM(m,axis=0):`
`+    """Returns median of the given masked numarray along the given axis."""`
`+    m = NM.asarray(m, NM.Float)`
`+    if len(m.shape) == 0:`
`+        return m[0]`
`+    elif len(m.shape) == 1:`
`+        c = NM.count(m)`
`+        mIndSort = NM.argsort(m,fill_value=1e20)`
`+        return (m[mIndSort[(c-1)/2]] + m[mIndSort[c/2]]) / 2.`
`+    else:`
`+        # count the number of nonmasked indices along the given axis`
`+        c = NM.count(m, axis=axis)`
`+        # prepare indices for other axis (except for the given)`
`+        cind = NA.indices(NA.shape(c))`
`+        # indices of m sorted by increasing value at axis 0; masked values placed at the end;`
`+        # use _argsortSwapNM() to get the shape same as m`
`+        mtIndSort = _argsortSwapNM(m,axis,fill_value=1e20)`
`+        # get indices to address median elements from m`
`+        # tuple(takeInd1): all lists in the tuple must be of the same shape,`
`+        #   the result mtIndSort[tuple(takeInd1)] is also of that shape`
`+        takeInd1 = cind.tolist()`
`+        takeInd1.insert(axis,((c-1)/2).tolist())`
`+        medInd1 = mtIndSort[tuple(takeInd1)]`
`+        takeInd2 = cind.tolist()`
`+        takeInd2.insert(axis,(c/2).tolist())`
`+        medInd2 = mtIndSort[tuple(takeInd2)]`
`+        # get both median elements; if c[i] odd: med1[i] == med2[i]`
`+        takeMed1 = cind.tolist()`
`+        takeMed1.insert(axis,medInd1.tolist())`
`+        med1 = m[tuple(takeMed1)]`
`+        takeMed2 = cind.tolist()`
`+        takeMed2.insert(axis,medInd2.tolist())`
`+        med2 = m[tuple(takeMed2)]`
`+##        if __name__=="__main__":`
`+##            print "m\n",m.filled(-1)`
`+##            print "c", c`
`+##            print "[(c-1)/2,c/2]", [(c-1)/2,c/2]`
`+##            print "cind\n",cind`
`+##            print "mtIndSort\n",mtIndSort`
`+##            print "medInd1\n",medInd1`
`+##            print "medInd2\n",medInd2`
`+##            print "med1\n",med1`
`+##            print "med2\n",med2`
`+        return (med1+med2)/2.`
`+`
`+def _argsortSwapNM(m,axis=0,fill_value=None):`
`+    """Returns the indices along the given axis sorted by increasing value of the given masked numarray`
`+    e.g. m=[[.5,.2],[.3,.6]], _argsortSwapNM(m,0)->[[1,0],[0,1]]"""`
`+    m = NM.asarray(m)`
`+    return NA.swapaxes(NA.asarray(NM.argsort(m, axis, fill_value)), -1, axis)`
`+`
`+`
`+###################################################################################`
`+## std. deviation MA`
`+###################################################################################`
`+`
`+def stdMA(m,axis=0):`
`+    m = MA.asarray(m)`
`+    m = MA.transpose(m, [axis] + range(0,axis) + range(axis+1,MA.rank(m)))  # do not use swapaxes, use (axis, 0...axis-1, axis+1...rank)`
`+    return MA.sqrt(MA.add.reduce((MA.subtract(m,MA.average(m,0)))**2,0)/(m.shape[0]-1))`
`+`
`+`
`+###################################################################################`
`+## median absolute deviation`
`+###################################################################################`
`+`
`+def mad(m,axis=0):`
`+    """Returns Median Absolute Deviation of the given array along the given axis.`
`+    """`
`+    m = Numeric.asarray(m)`
`+    mx = Numeric.asarray(median(m,axis),Numeric.Float)`
`+    xt = Numeric.transpose(m, [axis]+range(axis)+range(axis+1,Numeric.rank(m))) # do not use swapaxes`
`+    return MLab.median(Numeric.absolute(xt-mx))`
`+`
`+def madMA(m,axis=0):`
`+    """Returns Median Absolute Deviation of the given masked array along the given axis.`
`+    """`
`+    m = MA.asarray(m)`
`+    mx = MA.asarray(medianMA(m,axis),MA.Float)`
`+    xt = MA.transpose(m, [axis]+range(axis)+range(axis+1,MA.rank(m)))   # do not use swapaxes`
`+    return medianMA(MA.absolute(xt-mx))`
`+`
`+def _madNA(m,axis=0):`
`+    """Returns Median Absolute Deviation of the given numarray along the given axis.`
`+    """`
`+    m = NA.asarray(m)`
`+    mx = NA.asarray(_medianNA(m,axis),NA.Float)`
`+    xt = NA.transpose(m, [axis]+range(axis)+range(axis+1,m.rank))`
`+    return LA.mlab.median(NA.absolute(xt-mx))`
`+`
`+def _madNM(m,axis=0):`
`+    """Returns Median Absolute Deviation of the given masked numarray along the given axis.`
`+    """`
`+    m = NM.asarray(m, NM.Float)`
`+    mx = _medianNM(m, axis)`
`+    # BUG WARNING: transpose returns ObjectArray, use NM.array(..., NM.Float)`
`+    xt = NM.array(NM.transpose(m, [axis]+range(axis)+range(axis+1,NM.rank(m))), NM.Float)`
`+    return _medianNM(NM.absolute(xt-mx),0)`
`+`
`+`
`+###################################################################################`
`+## binomial factors`
`+###################################################################################`
`+`
`+def binomial(n,r):`
`+    """Returns array of elementwise binomial factors: n[i]! / r[i]!(n[i]-r[i])!`
`+    type(n) & type(r) == int | Numeric.ArrayType of int`
`+    compute in floats to avoid overflow; max. n = 1029`
`+    """`
`+##    n = Numeric.asarray(n).astype(Numeric.Int)`
`+##    r = Numeric.asarray(r).astype(Numeric.Int)`
`+    n = Numeric.asarray(n)`
`+    r = Numeric.asarray(r)`
`+    assert Numeric.shape(n) == Numeric.shape(r), "binomial(n,r): n and r of different shape"`
`+    bin = Numeric.zeros(Numeric.shape(n), Numeric.Float)`
`+    for idx in xrange(len(n.flat)):`
`+        ni1 = n.flat[idx] + 1`
`+        b = Numeric.ones((ni1), Numeric.Float)`
`+        for i in xrange(1,ni1):`
`+            for j in xrange(i-1,0,-1):`
`+                b[j] += b[j-1]`
`+        bin.flat[idx] = b[r.flat[idx]]`
`+    return bin`
`+`
`+def __binomial_single(n,r):`
`+    """`
`+    DEPRICATED, see binomial(n,r)`
`+    returns n! / r!(n-r)!`
`+    """`
`+    b = Numeric.ones((n+1), Numeric.Float)`
`+    for i in xrange(1,n+1):`
`+        for j in xrange(i-1,0,-1):`
`+            b[j] += b[j-1]`
`+    return b[r]`
`+`
`+`
`+`
`+`
`+`
`+`
`+if __name__ == "__main__":`
`+`
`+    m1 = NM.array([4,3,6,8,54,1], mask=[1,1,0,1,1,1])`
`+    m2a = NM.array([[1,2,3],[4,5,6],[7,8,9]], NM.Float, mask=[[0,1,0],[0,1,0],[0,0,1]])`
`+    n2 = NA.array([[3,1,2],[4,3,6],[1,4,7],[4,3,6]])`
`+    n3 = NA.array([n2,n2])`
`+    m2 = NM.array([[5,7,3,3],[6,4,5,1],[2,6,1,4],[3,4,5,1],[3,4,5,1],[6,4,2,3]], NM.Int, mask=[[0,1,0,0],[0,1,0,0],[0,0,0,1],[0,0,0,0],[0,0,0,1],[1,1,1,1]])`
`+    m2s = NM.array([[5,7,3,3],[6,4,5,1],[2,6,1,4],[3,4,5,1],[3,4,5,1],[6,4,2,3],[6,4,2,3],[6,4,2,3],[6,4,2,3],[6,4,2,3]], NM.Int, mask=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1]])`
`+    m2u = NM.array([[5,7,3,3],[6,4,5,1],[2,6,1,4],[3,4,5,1],[3,4,5,1],[6,4,2,3]], NM.Int, mask=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]])`
`+    m3 = NM.array([m2.raw_data(),10*m2.raw_data()],NM.Int, mask=[m2.raw_mask(),m2.raw_mask()], fill_value=-1)`
`+`
`+##    def test3D(m3):`
`+##        print "median\n",_medianNM(m3,0).filled(-1)`
`+##        print "......."`
`+##        for ii in range(m3.shape[0]):`
`+##            print m3[ii,:,:].filled(-1)`
`+##            print`
`+##        print "median\n",_medianNM(m3,1).filled(-1)`
`+##        print "......."`
`+##        for ii in range(m3.shape[1]):`
`+##            print m3[:,ii,:].filled(-1)`
`+##            print`
`+##        print "median\n",_medianNM(m3,2).filled(-1)`
`+##        print "......."`
`+##        for ii in range(m3.shape[2]):`
`+##            print m3[:,:,ii].filled(-1)`
`+##            print`
`+`
`+    # median    `
`+##    m1med = _medianNM(m1)`
`+##    m2med = _medianNM(m2,1)`
`+##    m3med = _medianNM(m3,2)`
`+`
`+    # MAD: must be equal!`
`+##    mad(m2u)`
`+##    mad(m2s)`
`+`
`+    # compare Numeric, MA, NA, NM`
`+##    import RandomArray`
`+##    lst1 = (RandomArray.random((7)) * 10).tolist()`
`+##    lst2 = (RandomArray.random((4,7)) * 10).tolist()`
`+##    lst3 = (RandomArray.random((3,4,7)) * 10).tolist()`
`+##`
`+##    def getAllArrays(lst1):`
`+##        # Numeric, MA`
`+##        nu1 = Numeric.array(lst1)`
`+##        ma1 = MA.array(lst1)`
`+##        ma1m = MA.masked_less(lst1, 5)`
`+##        # numarray, ma`
`+##        na1 = NA.array(lst1)`
`+##        nm1 = NM.array(lst1)`
`+##        nm1m = NM.masked_less(lst1, 5)`
`+##        return nu1, ma1, ma1m, na1, nm1, nm1m`
`+##`
`+##    nu1, ma1, ma1m, na1, nm1, nm1m = getAllArrays(lst1)`
`+##    nu2, ma2, ma2m, na2, nm2, nm2m = getAllArrays(lst2)`
`+##    nu3, ma3, ma3m, na3, nm3, nm3m = getAllArrays(lst3)`
`+`
`+    # linear interpolation of missing values (fillLinear)`
`+##    m1 = MA.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]], mask = [[1,0,0,0],[0,1,0,0],[0,0,0,1]])`
`+##    m2 = MA.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]], mask = [[1,1,0,0],[0,1,1,0],[0,0,1,1]])`
`+##    m1l = fillLinear(m1,1)`
`+##    m2l = fillLinear(m2,1)`
`+`
`+    # triangular matrix <-> 1D masked array`
`+##    m1 = MA.arange(0, 6)`
`+##    m2u = triangularPut(m1,1,0)`
`+##    m2l = triangularPut(m1,0,1)`
`+##    m2ul = triangularPut(m1,1,1)`
`+##    m2 = triangularPut(m1,0,0)`
`+##`
`+##    m2uld = diagonalPut(Numeric.arange(4), m2ul)`
`+##`
`+##    m1u = triangularGet(m2ul)`
`+##    m1l = triangularGet(m2ul, 0)`
`+    `

# OWChipANOVA.py

`+"""`
`+<name>ANOVA on Chip Data</name>`
`+<description>ANOVA on chip data (strains, replicas).</description>`
`+<category>Genomics</category>`
`+<icon>icons/ChipANOVA.png</icon>`
`+<priority>1120</priority>`
`+"""`
`+`
`+from OWWidget import *`
`+import OWGUI`
`+from OWChipDataFiles import ChipData`
`+import chipstat`
`+`
`+class ANOVAResults(orange.Orange):`
`+    pass`
`+`
`+class GeneSelection(orange.Orange):`
`+    pass`
`+`
`+class OWChipANOVA(OWWidget):`
`+    settingsList  = ['p1', 'p2', 'p3', 'filter1', 'filter2', 'filter3', 'commitOnChange']`
`+`
`+    def __init__(self, parent=None, name='ANOVA on Chip Data'):`
`+        OWWidget.__init__(self, parent, name, "ANOVA on chip data (strains, replicas).")`
`+        self.callbackDeposit = []`
`+`
`+        self.inputs = [("Structured Chip Data", ChipData, self.chipdata, 1)]`
`+        self.outputs = [("Selected Data", ChipData), ("Other Data", ChipData), ("Results of ANOVA", ANOVAResults), ("Gene Selection", GeneSelection)]`
`+`
`+        self.commitOnChange = 0`
`+        self.chipdata = None`
`+        self.p1, self.p2, self.p3 = (2, 2, 2)`
`+        self.filter1, self.filter2, self.filter3 = (1, 0, 0)`
`+        # Settings`
`+        self.loadSettings()`
`+        self.ps = None`
`+        self.selection = [None]*3`
`+`
`+        # GUI`
`+        # info`
`+        box = QVGroupBox("Info", self.controlArea)`
`+        self.infoa = QLabel('No data on input.', box)`
`+        self.infob = QLabel('', box)`
`+        OWGUI.separator(self.controlArea)`
`+`
`+        # gene selection`
`+        self.selectionBox = QVGroupBox("Gene Selection", self.controlArea)`
`+        self.factors = [('First factor (time)', 'p1', 'filter1'),`
`+                   ('Second factor (strain)', 'p2', 'filter2'),`
`+                   ('Interaction factor (time*strain)', 'p3', 'filter3')]`
`+        self.pvals = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5]`
`+`
`+        self.numgenes = []`
`+        for i in range(3):`
`+            OWGUI.checkBox(self.selectionBox, self, self.factors[i][2], self.factors[i][0], callback=self.finalselection)`
`+            hbox = QHBox(self.selectionBox)`
`+            lbl = QLabel('', hbox)`
`+            lbl.setFixedSize(20, lbl.sizeHint().height())`
`+            lbl = QLabel('p <', hbox)`
`+            lbl.setFixedSize(25, lbl.sizeHint().height())`
`+            OWGUI.comboBox(hbox, self, self.factors[i][1], items = self.pvals, callback=lambda x=i: self.geneselection(x))`
`+            self.numgenes.append(QLabel('  (0 genes)', hbox))`
`+        self.selectionLbl = QLabel('Total of 0 genes match criteria', self.selectionBox)`
`+        self.selectionBox.setDisabled(1)`
`+`
`+        # output`
`+        box = QVGroupBox("Output", self.controlArea)`
`+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')`
`+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.senddata, disabled=1)`
`+`
`+        self.resize(200,100)`
`+        `
`+    def chipdata(self, data):`
`+        if data:`
`+            self.data = data`
`+            self.selectionBox.setEnabled(1)`
`+            nfiles = 0`
`+            for (n, d) in data:`
`+                nfiles += len(d)`
`+            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(data), nfiles))`
`+            d = data[0][1][0]`
`+            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))`
`+`
`+            self.analysis()`
`+        else:`
`+            self.send("Results of ANOVA", None)`
`+        self.commitBtn.setEnabled(data <> None)`
`+`
`+    def geneselection(self, indx):            `
`+        margin = self.pvals[getattr(self, self.factors[indx][1])]`
`+        p = [x[indx] for x in self.ps]`
`+        n = len(filter(lambda x: x<margin, p))`
`+        self.numgenes[indx].setText('  (%d %s)' % (n, ['genes', 'gene'][n==1]))`
`+        self.selection[indx] = map(lambda x: x<margin, p)`
`+        for x in self.selection:`
`+            if x==None: return`
`+        self.finalselection()`
`+`
`+    def finalselection(self):`
`+        if not self.ps:`
`+            return`
`+        self.match = [0]*len(self.ps)`
`+        for indx in range(3):`
`+            if getattr(self, self.factors[indx][2]) and self.selection[indx]:`
`+                self.match = map(lambda a,b: a or b, self.match, self.selection[indx])`
`+        n = self.match.count(1)`
`+        self.selectionLbl.setText('Total of %d %s match criteria' % (n, ['genes', 'gene'][n==1]))`
`+        if self.commitOnChange:`
`+            self.senddata()`
`+`
`+    def analysis(self):`
`+        self.progressBarInit()`
`+        pbStep = 90./len(self.data[0][1][0])`
`+        self.ps, anova_results = chipstat.anova_on_genes(self.data, callback=lambda: self.progressBarAdvance(pbStep))`
`+        self.send("Results of ANOVA", anova_results)`
`+        #print 'AAA', anova_results.classNames`
`+        self.progressBarSet(90)`
`+        self.selection = [None]*3`
`+        for indx in range(3):`
`+            self.geneselection(indx)`
`+        self.progressBarFinished()`
`+`
`+    def senddata(self):`
`+        self.send("Gene Selection", self.match)`
`+        n = self.match.count(1)`
`+        if n==len(self.data[0][1][0]): # all genes match`
`+            self.send("Selected Data", self.data)`
`+            self.send("Other Data", None)`
`+        elif n==0:`
`+            self.send("Selected Data", None)`
`+            self.send("Other Data", self.data)`
`+        else:`
`+            print 'processing'`
`+            P = []; N = []`
`+            for (strainname, data) in self.data:`
`+                dataP = [d.select(self.match) for d in data]`
`+                dataN = [d.select(self.match, negate=1) for d in data]`
`+                for i in range(len(data)):`
`+                    dataP[i].name = dataN[i].name = data[i].name`
`+                P.append((strainname, dataP))`
`+                N.append((strainname, dataN))`
`+            self.send("Selected Data", P)`
`+            self.send("Other Data", N)`
`+`
`+if __name__=="__main__":`
`+    a=QApplication(sys.argv)`
`+    ow=OWChipANOVA()`
`+    a.setMainWidget(ow)`
`+    ow.show()`
`+    a.exec_loop()`
`+    ow.saveSettings()`

# OWChipDataFiles.py

`+"""`
`+<name>Chip Data Files</name>`
`+<description>Loads microarray data sets from designated directory.</description>`
`+<category>Genomics</category>`
`+<icon>icons/ChipDataFiles.png</icon>`
`+<priority>1100</priority>`
`+"""`
`+`
`+from OWWidget import *`
`+import OWGUI`
`+import os, os.path, orange`
`+`
`+class ChipData(orange.Orange):`
`+    pass`
`+`
`+class Mico(orange.Orange):`
`+    pass`
`+`
`+class myCheckListItem(QCheckListItem):`
`+    def __init__(self, *args):`
`+        self.callback = None`
`+        QCheckListItem.__init__(self, *args)`
`+        self.setOn(1)`
`+`
`+    def stateChange(self, b):`
`+        if self.callback:`
`+            self.callback()`
`+`
`+class OWChipDataFiles(OWWidget):`
`+    settingsList  = ["recentDirs","selectedDirName", "applyOnChange"]`
`+`
`+    def __init__(self, parent=None, name='Chip Data Files'):`
`+        OWWidget.__init__(self, parent, name, "ChipData loads microarray data sets from designated directory.")`
`+`
`+        self.callbackDeposit = []`
`+`
`+        self.inputs = []`
`+        self.outputs = [("Structured Chip Data", ChipData)]`
`+`
`+        self.chipdata = None; self.datasets = None`
`+        # Settings`
`+        self.recentDirs=[]`
`+        self.selectedDirName = "None"`
`+        self.applyOnChange = 0`
`+        self.loadSettings()`
`+        print 'ddd (1)', self.recentDirs`
`+`
`+        # GUI`
`+        # CONTROLS`
`+        box = QHGroupBox("Directory", self.controlArea)`
`+        self.dircombo=QComboBox(box)`
`+        self.dircombo.setMinimumWidth(250)`
`+        button = OWGUI.button(box, self, '...', callback = self.browseDirectory, disabled=0)`
`+        button.setMaximumWidth(25)`
`+`
`+        # info`
`+        box = QVGroupBox("Info", self.controlArea)`
`+        self.infoa = QLabel('No data loaded.', box)`
`+        self.infob = QLabel('', box)`
`+            `
`+        # Output`
`+        out = QVGroupBox("Output", self.controlArea)`
`+        OWGUI.checkBox(out, self, 'applyOnChange', 'Commit data on selection change')`
`+        self.commitBtn = OWGUI.button(out, self, "Commit", callback=self.sendData, disabled=1)`
`+`
`+        # connecting GUI to code`
`+        self.connect(self.dircombo,SIGNAL('activated(int)'),self.selectDir)`
`+        self.resize(500,200)`
`+            `
`+        # LIST VIEW`
`+        self.layout=QVBoxLayout(self.mainArea)`
`+        self.splitter = QSplitter(QSplitter.Vertical, self.mainArea)`
`+        self.layout.add(self.splitter)`
`+`
`+        self.tree = QListView(self.splitter)`
`+        self.tree.setAllColumnsShowFocus(1)`
`+        self.tree.addColumn('Directory/Data File')`
`+        self.tree.setColumnWidth(0, 300)`
`+        self.tree.setColumnWidthMode(0, QListView.Manual)`
`+        self.tree.setColumnAlignment(0, QListView.AlignLeft)`
`+            `
`+        self.recentDirs=filter(os.path.exists,self.recentDirs)`
`+        self.setDirlist()`
`+        self.dircombo.setCurrentItem(0)`
`+`
`+        if self.recentDirs!=[]:`
`+            self.loadData(self.recentDirs[0])`
`+        print 'ddd (2)', self.recentDirs`
`+`
`+        self.commitBtn = OWGUI.button(box, self, "Test", callback=self.test)`
`+            `
`+    def test(self):`
`+        print 'ddd (tst)', self.recentDirs`
`+        print 'ddd (tst) type', type(self.recentDirs), type(self.recentDirs[0])`
`+        `
`+    def setFileTree(self):`
`+        self.tree.clear()`
`+        self.listitems = []`
`+        for d in self.chipdata:`
`+            (dirname, files) = d`
`+            diritem = myCheckListItem(self.tree, dirname, QCheckListItem.CheckBox)`
`+            diritem.callback = self.selectionChanged`
`+            self.listitems.append(diritem)`
`+            diritem.setOpen(1)`
`+            diritem.name = dirname`
`+            for f in files:`
`+                item = myCheckListItem(diritem, f.name, QCheckListItem.CheckBox)`
`+                item.callback = self.selectionChanged`
`+                self.listitems.append(item)`
`+                item.data = f`
`+                `
`+    def selectionChanged(self):`
`+        if self.applyOnChange and self.okToCommit:`
`+            self.sendData()`
`+`
`+    # checks which data has been selected, builds a chip data structure, and sends it out`
`+    def sendData(self):`
`+        data = []`
`+        dir = self.tree.firstChild()`
`+        while dir:`
`+            if dir.isOn():`
`+                files = []`
`+                f = dir.firstChild()`
`+                while f:`
`+                    if f.isOn():`
`+                        files.append(f.data)`
`+                    f = f.nextSibling()`
`+                if len(files):`
`+                    data.append((dir.name, files))`
`+            dir = dir.nextSibling()`
`+        self.send("Structured Chip Data", data)`
`+`
`+    # Loads the chip data from a root directory, sends the data to the output channels`
`+    def loadData(self, root):`
`+        self.okToCommit = 0`
`+        if root == "(none)":`
`+            self.send("Structured Chip Data", None)`
`+        chipdata = [] # structured [(dirname0, [d00, d01, ...]), ...]`
`+        datasets = []  # flat list containing all the data sets`
`+        dirs = os.listdir(root)`
`+        for d in dirs:`
`+            dirname = root+'\\'+d`
`+            if os.path.isdir(dirname):`
`+                dirdata = []   `
`+                files  = os.listdir(dirname)`
`+                for f in files:`
`+                    name, ext = os.path.splitext(f)`
`+                    if ext in ['.tab', '.txt', '.data']:`
`+                        try:`
`+                            data = None`
`+                            data = orange.ExampleTable(dirname+'\\'+f)`
`+                            data.name = name`
`+                            dirdata.append(data)`
`+                        except orange.KernelException:`
`+                            print 'eee Exception, file not in appropriate format'`
`+                if len(dirdata):`
`+                    chipdata.append((os.path.split(dirname)[1], dirdata))`
`+                    datasets = datasets + dirdata`
`+`
`+        self.commitBtn.setEnabled(len(chipdata))`
`+        if len(chipdata):`
`+            self.chipdata = chipdata`
`+            self.datasets = datasets`
`+`
`+##            self.addDirToList(root)`
`+##            self.selectedDirName = root`
`+`
`+            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(self.chipdata), len(self.datasets)))`
`+            d = self.datasets[0]`
`+            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))`
`+            `
`+            self.setFileTree()`
`+            self.okToCommit = 1`
`+            self.sendData()`
`+`
`+    # displays a file dialog and selects a directory`
`+    def browseDirectory(self):`
`+        if len(self.recentDirs):`
`+            startdir=os.path.split(self.recentDirs[0][:-1])[0]`
`+        else:`
`+            startdir ="."`
`+        dirname=str(QFileDialog.getExistingDirectory(startdir, None, '', 'Microarray Data Directory', 1))`
`+        if len(dirname):`
`+            self.loadData(str(dirname))`
`+            self.addDirToList(dirname) # XXX do this only if loadData successfull`
`+`
`+    def setDirlist(self):`
`+        self.dircombo.clear()`
`+        if len(self.recentDirs):`
`+            for dir in self.recentDirs:`
`+                print 'DDD', dir`
`+                (upperdir,dirname)=os.path.split(dir[:-1]) #:-1 removes the trailing '\'`
`+                print '---'`
`+                #leave out the path`
`+                self.dircombo.insertItem(dirname)`
`+        else:`
`+            self.dircombo.insertItem("(none)")`
`+        self.dircombo.adjustSize() #doesn't work properly :(`
`+`
`+    def addDirToList(self, dir):`
`+        # Add a directory to the start of the file list. `
`+        # If it exists, move it to the start of the list`
`+        print 'ddd (4)', self.recentDirs`
`+        if dir in self.recentDirs:`
`+            self.recentDirs.remove(dir)`
`+        self.recentDirs.insert(0, str(dir))`
`+        self.setDirlist()`
`+        print 'ddd (5)', self.recentDirs`
`+        self.selectedDirName = dir`
`+`
`+    # called when user makes a selection from the drop-down menu`
`+    def selectDir(self, n):`
`+        if self.recentDirs:`
`+            self.loadData(self.recentDirs[n])`
`+            self.addDirToList(self.recentDirs[n])`
`+        else:`
`+            self.loadData("(none)")`
`+`
`+if __name__=="__main__":`
`+    a=QApplication(sys.argv)`
`+    ow=OWChipDataFiles()`
`+    a.setMainWidget(ow)`
`+`
`+    ow.show()`
`+    a.exec_loop()`
`+    ow.saveSettings()`
`+    print 'ddd', ow.recentDirs`

# OWChipPostHocANOVA.py

`+"""`
`+<name>Post Hoc ANOVA on Chip Data</name>`
`+<description>Post Hoc ANOVA on chip data (strains, replicas).</description>`
`+<category>Genomics</category>`
`+<icon>icons/ChipPostHocANOVA.png</icon>`
`+<priority>1130</priority>`
`+"""`
`+`
`+from OWWidget import *`
`+import OWGUI`
`+from OWChipDataFiles import ChipData`
`+from OWChipANOVA import ANOVAResults, GeneSelection`
`+import chipstat`
`+`
`+class OWChipPostHocANOVA(OWWidget):`
`+    settingsList  = ['commitOnChange', 'f1', 'f2', 'p1', 'p2', 'p3', 'filter1', 'filter2', 'filter3']`
`+`
`+    def __init__(self, parent=None, name='ANOVA on Chip Data'):`
`+        OWWidget.__init__(self, parent, name, "ANOVA on chip data (strains, replicas).")`
`+        self.callbackDeposit = []`
`+`
`+        self.inputs = [("Results of ANOVA", ANOVAResults, self.anovaresults, 1)]`
`+        self.outputs = [("PostHoc Gene Selection", GeneSelection)]`
`+`
`+        self.commitOnChange = 0`
`+        self.anova = None`
`+`
`+        self.p1, self.p2, self.p3 = (2, 2, 2)`
`+        self.filter1, self.filter2, self.filter3 = (1, 0, 0)`
`+        self.f1 = None; self.f2 = None`
`+        # Settings`
`+        self.loadSettings()`
`+        self.ps = None`
`+        self.selection = [None]*3`
`+`
`+        # GUI`
`+        # info`
`+        box = QVGroupBox("Info", self.controlArea)`
`+        self.infoa = QLabel('No data on input.', box)`
`+        OWGUI.separator(self.controlArea)`
`+`
`+        # factors`
`+        box = QVGroupBox("Factors", self.controlArea)`
`+        self.f1Combo = OWGUI.comboBox(box, self, 'f1', sendSelectedValue=1, callback=[self.setFactorCombos, self.analysis], label="First:", labelWidth=50, orientation="horizontal")`
`+        self.f1Combo.setDisabled(1)`
`+        self.f2Combo = OWGUI.comboBox(box, self, 'f2', sendSelectedValue=1, callback=[self.setFactorCombos, self.analysis], label="Second:", labelWidth=50, orientation="horizontal")`
`+        self.f2Combo.setDisabled(1)`
`+`
`+        # gene selection`
`+        self.selectionBox = QVGroupBox("Gene Selection", self.controlArea)`
`+        self.factors = [('First factor (time)', 'p1', 'filter1'),`
`+                   ('Second factor (strain)', 'p2', 'filter2'),`
`+                   ('Interaction factor (time*strain)', 'p3', 'filter3')]`
`+        self.pvals = [0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5]`
`+`
`+        self.numgenes = []`
`+        for i in range(3):`
`+            OWGUI.checkBox(self.selectionBox, self, self.factors[i][2], self.factors[i][0], callback=self.finalselection)`
`+            hbox = QHBox(self.selectionBox)`
`+            lbl = QLabel('', hbox)`
`+            lbl.setFixedSize(20, lbl.sizeHint().height())`
`+            lbl = QLabel('p <', hbox)`
`+            lbl.setFixedSize(25, lbl.sizeHint().height())`
`+            OWGUI.comboBox(hbox, self, self.factors[i][1], items = self.pvals, callback=lambda x=i: self.geneselection(x))`
`+            self.numgenes.append(QLabel('  (0 genes)', hbox))`
`+        self.selectionLbl = QLabel('Total of 0 genes selected', self.selectionBox)`
`+        self.selectionBox.setDisabled(1)`
`+`
`+        # output`
`+        box = QVGroupBox("Output", self.controlArea)`
`+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')`
`+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.senddata, disabled=1)`
`+`
`+        self.resize(200,100)`
`+`
`+    def setFactorCombos(self):`
`+`
`+        def setOneCombo(combo, exclude):`
`+            combo.clear()`
`+            labels = [x for x in self.anova.classNames]`
`+            if exclude in self.anova.classNames:`
`+                labels.remove(exclude)`
`+            for l in labels:`
`+                combo.insertItem(l)`
`+`
`+        items = self.anova.classNames`
`+        if not self.f1 in items:`
`+            self.f1 = items[items[0]==self.f2]`
`+        if not self.f2 in items:`
`+            self.f2 = items[items[0]==self.f2]`
`+`
`+        setOneCombo(self.f1Combo, self.f2)`
`+        setOneCombo(self.f2Combo, self.f1)`
`+        self.f1 = self.f1 # this looks stupid, but it's not:`
`+        self.f2 = self.f2 # it sets the right item to the combo boxes`
`+`
`+    def anovaresults(self, anova):`
`+        self.anova = anova`
`+        self.selectionBox.setEnabled(anova <> None)`
`+        self.commitBtn.setEnabled(anova <> None)`
`+        if anova:`
`+            self.setFactorCombos()`
`+            self.f1Combo.setEnabled(1)`
`+            self.f2Combo.setEnabled(1)`
`+            self.analysis(commit=1)`
`+            self.infoa.setText('ANOVA results on %d strains' % len(self.anova.classNames))`
`+        else:`
`+            self.send("PostHoc Gene Selection", None)`
`+            self.f1Combo.clear()`
`+            self.f1Combo.setDisabled(1)`
`+        `
`+`
`+    def geneselection(self, indx, commit=0):            `
`+        margin = self.pvals[getattr(self, self.factors[indx][1])]`
`+        p = [x[indx] for x in self.ps]`
`+        n = len(filter(lambda x: x<margin, p))`
`+        self.numgenes[indx].setText('  (%d %s)' % (n, ['genes', 'gene'][n==1]))`
`+        self.selection[indx] = map(lambda x: x<margin, p)`
`+        for x in self.selection:`
`+            if x==None: return`
`+        self.finalselection(commit)`
`+`
`+    def finalselection(self, commit=0):`
`+        if not self.ps:`
`+            return`
`+        self.match = [0]*len(self.ps)`
`+        for indx in range(3):`
`+            if getattr(self, self.factors[indx][2]) and self.selection[indx]:`
`+                self.match = map(lambda a,b: a or b, self.match, self.selection[indx])`
`+        n = self.match.count(1)`
`+        print self.match`
`+        self.selectionLbl.setText('Total of %d %s match criteria' % (n, ['genes', 'gene'][n==1]))`
`+        if self.commitOnChange or commit:`
`+            self.senddata()`
`+`
`+    def analysis(self, commit=0):`
`+        self.progressBarInit()`
`+        if not self.anova:`
`+            return`
`+        pbStep = 90./len(self.anova.anovaList)`
`+        self.ps = chipstat.posthoc_anova_on_genes(self.f1, self.f2, self.anova, callback=lambda: self.progressBarAdvance(pbStep))`
`+        self.progressBarSet(90)`
`+        self.selection = [None]*3`
`+        for indx in range(3):`
`+            self.geneselection(indx, commit=1)`
`+        self.progressBarFinished()`
`+`
`+    def senddata(self):`
`+        self.send("PostHoc Gene Selection", self.match)`
`+`
`+if __name__=="__main__":`
`+    a=QApplication(sys.argv)`
`+    ow=OWChipPostHocANOVA()`
`+    a.setMainWidget(ow)`
`+    ow.show()`
`+    a.exec_loop()`
`+    ow.saveSettings()`

# OWHeatMap.py

` from OWWidget import *`
` from OWOptions import *`
` from qwt import *`
`+from OWChipDataFiles import ChipData`
`+`
`+from OWChipANOVA import ANOVAResults`
` `
` ##############################################################################`
` # parameters that determine the canvas layout`
`         self.callbackDeposit = [] # deposit for OWGUI callback functions`
`         OWWidget.__init__(self, parent, name, 'Microarray Heat Map', FALSE, FALSE) `
`         `
`-        self.inputs = [("Examples", ExampleTable, self.dataset, 0)]`
`+        self.inputs = [("Examples", ExampleTable, self.dataset, 0), ("Structured Chip Data", ChipData, self.chipdata, 1)]`
`         self.outputs = [("Examples", ExampleTable), ("Classified Examples", ExampleTableWithClass)]`
` `
`         #set default settings`
`         self.LegendOnTop = 1           # legend stripe on top (bottom)?`
`         self.LegendOnBottom = 1`
`         self.ShowGroupLabel = 1        # show class names in case of classified data?`
`-        self.ShowAverageStripe = 1     # show the stripe with the evarage`
`+        self.ShowAverageStripe = 0     # show the stripe with the evarage`
`         self.MaintainArrayHeight = 1   # adjust cell height while changing the merge factor`
`         self.BShowballoon = 1          # balloon help`
`         self.ShowGeneAnnotations = 1   # show annotations for genes`
`         box = QVButtonGroup("Annotation && Legends", tab)`
`         OWGUI.checkBox(box, self, 'LegendOnTop', 'Show legend', callback=self.drawHeatMap)`
`         OWGUI.checkBox(box, self, 'ShowAverageStripe', 'Stripes with averages', callback=self.drawHeatMap)`
`-        OWGUI.checkBox(box, self, 'ShowGeneAnnotations', 'Gene annotations', callback=self.drawHeatMap)`
`+        self.geneAnnotationsCB = OWGUI.checkBox(box, self, 'ShowGeneAnnotations', 'Gene annotations', callback=self.drawHeatMap)`
`         `
`         self.annotationCombo = OWGUI.comboBox(box, self, "BAnnotationIndx", items=[], callback=lambda x='BAnnotationVar', y='BAnnotationIndx': self.setMetaID(x, y))`
` `
`         OWGUI.checkBox(box, self, 'BShowballoon', "Show balloon", callback=lambda: self.balloonInfoBox.setDisabled(not self.BShowballoon))`
`         box = QVButtonGroup("Balloon Info", tab)`
`         OWGUI.checkBox(box, self, 'BShowColumnID', "Column ID")`
`-        OWGUI.checkBox(box, self, 'BShowSpotIndex', "Spot Index", callback=lambda: self.spotCombo.setDisabled(not self.BShowSpotIndex))`
`+        self.spotIndxCB = OWGUI.checkBox(box, self, 'BShowSpotIndex', "Spot Index", callback=lambda: self.spotCombo.setDisabled(not self.BShowSpotIndex))`
`         self.spotCombo = OWGUI.comboBox(box, self, "BSpotIndx", items=[], callback=lambda x='BSpotVar', y='BSpotIndx': self.setMetaID(x, y))`
`         OWGUI.checkBox(box, self, 'BShowGeneExpression', "Gene expression")`
`         OWGUI.checkBox(box, self, 'BShowAnnotation', "Annotation")`
`         `
`     # any time the data changes, the two combo boxes showing meta attributes`
`     # have to be adjusted`
`-    def setMetaCombo(self, cb, value, enabled=1, default=None): `
`+    def setMetaCombo(self, cb, value, enabled=1, default=None):`
`         cb.clear()`
`         if len(self.meta)==0:`
`             cb.setDisabled(True)`
`-            return (None, None, None)`
`+            self.spotIndxCB.setDisabled(1); self.geneAnnotationsCB.setDisabled(1)`
`+            return (None, None)`
`         cb.setDisabled(not enabled)`
`+        self.spotIndxCB.setEnabled(1); self.geneAnnotationsCB.setEnabled(1)`
`         for m in self.meta:`
`             cb.insertItem(m)`
`         `
`         if not (value in self.meta):`
`-            value = default`
`+            if default in self.meta:`
`+                value = default`
`+            else:`
`+                value = None`
` `
`         if value in self.meta:`
`             cb.setCurrentItem(self.meta.index(value))`
` `
`     def setMetaCombos(self):`
`         self.meta = [m.name for m in self.data[0].domain.getmetas().values()]`
`-        (self.BSpotVar, self.BSpotIndx) = self.setMetaCombo(self.spotCombo, self.BSpotVar, enabled=self.BShowSpotIndex, default='RMI')`
`-        (self.BAnnotationVar, self.BAnnotationIndx) = self.setMetaCombo(self.annotationCombo, self.BAnnotationVar, enabled=self.BShowAnnotation, default='annotation')`
`+        self.BSpotVar, self.BSpotIndx = self.setMetaCombo(self.spotCombo, self.BSpotVar, enabled=self.BShowSpotIndex, default='RMI')`
`+        self.BAnnotationVar, self.BAnnotationIndx = self.setMetaCombo(self.annotationCombo, self.BAnnotationVar, enabled=self.BShowAnnotation, default='xannotation')`
` `
`     ##########################################################################`
`     # handling of input/output signals`
` `
`     def dataset(self, data, id):`
`+        dataIDs = [d.id for d in self.data]`
`         if not data:`
`-            # should remove the data where necessary`
`-            return`
`-        # check if the same length`
`-        if data.domain.classVar:`
`-            domain = self.checkDomain(data)`
`-            if domain:`
`-                data = orange.ExampleTable(domain, data)`
`-##                print 'changed!'`
`-##        print 'len=', len(data)`
`-##        if data.domain.classVar:`
`-##            print 'class', len(data.domain.classVar.values)`
`-##        else:`
`-##            print 'no class'`
`-        data.setattr("id", id)`
`-        if len(self.data) and (id in [d.id for d in self.data]):`
`-            for i in range(len(self.data)):`
`-                if id==self.data[i].id:`
`-                    self.data[i] = data`
`-                    self.fileLB.changeItem(data.name, i)`
`-                    break`
`+            if id in dataIDs:`
`+                del self.data[dataIDs.index(id)]`
`         else:`
`-            self.data.append(data)`
`-            self.fileLB.insertItem(data.name)`
`+            # check if the same length`
`+            if data.domain.classVar:`
`+                domain = self.checkDomain(data)`
`+                if domain:`
`+                    data = orange.ExampleTable(domain, data)`
`+            data.setattr("id", id)`
`+            if len(self.data) and (id in dataIDs):`
`+                for i in range(len(self.data)):`
`+                    if id==self.data[i].id:`
`+                        self.data[i] = data`
`+                        self.fileLB.changeItem(data.name, i)`
`+                        break`
`+            else:`
`+                self.data.append(data)`
`+                self.fileLB.insertItem(data.name)`
` `
`-        if (self.data <> None and len(self.data) > 1):`
`-            self.tabs.setTabEnabled(self.filesTab, 1)`
`-        else:`
`-            self.tabs.setTabEnabled(self.filesTab, 0)`
`+            if len(self.data) > 1:`
`+                self.tabs.setTabEnabled(self.filesTab, 1)`
`+            else:`
`+                self.tabs.setTabEnabled(self.filesTab, 0)`
`+            self.setMetaCombos() # set the two combo widgets according to the data`
`             `
`         self.send('Classified Examples', None)`
`         self.send('Examples', None)`
`-        self.setMetaCombos() # set the two combo widgets according to the data`
`         self.constructHeatmap()`
`+        self.canvas.update()`
` `
`+    def chipdata(self, data):`
`+        print 'CHIPDATA'`
`+        self.data = [] # XXX should only remove the data from the same source, use id in this rutine`
`+        if not data:`
`+            for i in self.canvas.allItems():`
`+                i.setCanvas(None)`
`+            self.canvas.update()`
`+            return`
`+        indx = 0`
`+        for (strainname, ds) in data:`
`+            for d in ds:`
`+                self.dataset(d, indx)`
`+                indx += 1`
`+        self.createHeatMap()`
`+        self.canvas.update()`
`+        `
`     def constructHeatmap(self):`
`         self.heatmapconstructor = []`
`-        if self.SortGenes:`
`-            self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0]))`
`-        else:`
`-            self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0], None))`
`-        for i in range(len(self.data))[1:]:`
`-            self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[i], self.heatmapconstructor[0]))`
`+        if len(self.data):`
`+            if self.SortGenes:`
`+                self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0]))`
`+            else:`
`+                self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[0], None))`
`+            for i in range(len(self.data))[1:]:`
`+                self.heatmapconstructor.append(orange.HeatmapConstructor(self.data[i], self.heatmapconstructor[0]))`
`         self.createHeatMap()`
` `
`     # remove unused values from the class of the data set`
`         cl = clo = data.domain.classVar`
`         if cl:`
`             if selection:`
`-##                print 'selection'`
`                 cl = orange.RemoveUnusedValues(cl, selection, removeOneValued = 1)`
`-##                if cl == clo:`
`-##                    print 'same'`
`             else:`
`                 cl = orange.RemoveUnusedValues(cl, data, removeOneValued = 1)`
` `
`         for (g,s,e) in rows:`
`             hm = self.heatmaps[0][g]`
`             ex += hm.examples[hm.exampleIndices[s] : hm.exampleIndices[e+1]]`
`-            print 'sss', s, e`
` `
`         # Reduce the number of class values, if class is defined`
`         newdomain = self.checkDomain(self.data[0], selection=ex)`
`         if not newdomain:`
`-##            print 'retain'`
`             newdomain = self.data[0].domain`
`         selectedData = orange.ExampleTable(newdomain, ex)`
`         if selectedData.domain.classVar:`
`         pass`
` `
`     def fileOrderChange(self, chg):`
`-##        print 'ccc', chg`
`         if chg==-1 and self.selectedFile>0:`
`             switchFiles(self.selectedFile, self.selectedFile-1)`
`         if chg==1  and self.selectedFile < len(self.data - 1):`
` `
`         # annotate`
`         hm = self.heatmaps[0][group]`
`-        for (row, indices) in enumerate(hm.exampleIndices[:-1]):`
`-            t = QCanvasText(str(hm.examples[hm.exampleIndices[row]][self.BAnnotationVar]), self.canvas)`
`-            t.setFont(font)`
`-            t.setX(x); t.setY(y)`
`-            t.show()`
`-            y += self.CellHeight`
`+        if self.BAnnotationVar:`
`+            for (row, indices) in enumerate(hm.exampleIndices[:-1]):`
`+                t = QCanvasText(str(hm.examples[hm.exampleIndices[row]][self.BAnnotationVar]), self.canvas)`
`+                t.setFont(font)`
`+                t.setX(x); t.setY(y)`
`+                t.show()`
`+                y += self.CellHeight`
` `
`     def drawHeatMap(self):`
`-        print "DRAW HEATMAP"`
`+        # remove everything from current canvas`
`+        for i in self.canvas.allItems():`
`+            i.setCanvas(None)`
`+        if not len(self.data):`
`+            return`
`+`
`         lo = self.CutEnabled and self.CutLow   or self.lowerBound`
`         hi = self.CutEnabled and self.CutHigh  or self.upperBound`
` `
`-        # remove everything from current canvas`
`-        for i in self.canvas.allItems():`
`-            i.setCanvas(None)`
`         self.canvasView.heatmapParameters(self, self.CellWidth, self.CellHeight) # needed for event handling`
` `
`         palette = self.ColorPalettes[self.CurrentPalette]`
`             bmpl = []`
`             for g in range(groups):`
`                 bmp, self.imageWidth, imageHeight = hm[g].getBitmap(int(self.CellWidth), int(self.CellHeight), lo, hi, self.Gamma)`
`-                print "HAVE BITMAP %i %i" % (self.imageWidth, imageHeight),`
`                 bmpl.append(bmp)`
`                 if not i: self.heights.append(imageHeight)`
`-                print "OK"`
`             self.bmps.append(bmpl)`
` `
`         self.canvas.resize(2000, 2000) # this needs adjustment`
`         x = c_offsetX; y0 = c_offsetY`
` `
`         self.legend = self.heatmapconstructor[0].getLegend(self.imageWidth, c_legendHeight, self.Gamma)`
`-        print "LEGEND OK"`
`         if self.LegendOnTop:`
`             y0 = self.drawLegend(x, y0, self.imageWidth, c_legendHeight, palette)`
` `
`                     y = self.drawGroupLabel(self.data[i][0].domain.classVar.values[g], x, y, self.imageWidth)`
`                 if not i: self.imgStart.append(y)`
`                 ycoord.append(y)`
`-                print "DFN %i" % self.heights[g]`
`                 image = ImageItem(self.bmps[i][g], self.canvas, self.imageWidth, self.heights[g], palette, x=x, y=y, z=z_heatmap)`
`-                print "IMAGE %i" % g`
`                 image.hm = self.heatmaps[i][g] # needed for event handling`
`                 image.height = self.heights[g]; image.width = self.imageWidth`
`                 if not i: self.imgEnd.append(y+self.heights[g]-1)`
`         self.canvas.update()`
`         `
`     def createHeatMap(self):`
`-        merge = min(self.Merge, float(len(self.data[0])))`
`-        squeeze = 1. / merge`
`-        self.lowerBound = 1000; self.upperBound = -1000 # CHANGE!!!`
`-        self.heatmaps = []`
`-        for (i, hmc) in enumerate(self.heatmapconstructor):`
`-            hm, lb, ub = hmc(squeeze)`
`-            self.heatmaps.append(hm)`
`-            self.lowerBound = min(self.lowerBound, lb)`
`-            self.upperBound = max(self.upperBound, ub)`
`+        if len(self.data):`
`+            merge = min(self.Merge, float(len(self.data[0])))`
`+            squeeze = 1. / merge`
`+            self.lowerBound = 1000; self.upperBound = -1000 # CHANGE!!!`
`+            self.heatmaps = []`
`+            for (i, hmc) in enumerate(self.heatmapconstructor):`
`+                hm, lb, ub = hmc(squeeze)`
`+                self.heatmaps.append(hm)`
`+                self.lowerBound = min(self.lowerBound, lb)`
`+                self.upperBound = max(self.upperBound, ub)`
` `
`-        self.sliderCutLow.setRange(self.lowerBound, 0, 0.1)`
`-        self.sliderCutHigh.setRange(1e-10, self.upperBound, 0.1)`
`-        self.CutLow = max(self.CutLow, self.lowerBound)`
`-        self.CutHigh = min(self.CutHigh, self.upperBound)`
`-        self.sliderCutLow.setValue(self.CutLow)`
`-        self.sliderCutHigh.setValue(self.CutHigh)`
`-        self.selection.remove()`
`+            self.sliderCutLow.setRange(self.lowerBound, 0, 0.1)`
`+            self.sliderCutHigh.setRange(1e-10, self.upperBound, 0.1)`
`+            self.CutLow = max(self.CutLow, self.lowerBound)`
`+            self.CutHigh = min(self.CutHigh, self.upperBound)`
`+            self.sliderCutLow.setValue(self.CutLow)`
`+            self.sliderCutHigh.setValue(self.CutHigh)`
`+            self.selection.remove()`
`         self.drawHeatMap()`
` `
` ##################################################################################################`
`             col, row = int(x / self.dx), int(y / self.dy)`
`             # hm.getCellIntensity(row, col), hm.getRowIntensity(row)`
`             ex = hm.examples[hm.exampleIndices[row] : hm.exampleIndices[row+1]]`
`-            #print 'xxx', hm.exampleIndices[row], hm.exampleIndices[row+1]`
`             self.selector.setX(item.x()+col*self.dx-v_sel_width+1)`
`             self.selector.setY(item.y()+row*self.dy-v_sel_width+1)`
`             self.selector.show()`
`             self.bubble.head.setText(head)`
`             # bubble, construct body`
`             body = None`
`-            if self.master.BShowSpotIndex or self.master.BShowAnnotation or self.master.BShowGeneExpression:`
`+            if (self.master.BShowSpotIndex and self.master.BSpotVar) or self.master.BShowAnnotation or self.master.BShowGeneExpression:`
`                 for (i, e) in enumerate(ex):`
`                     if i>5:`
`                         body += "\n... (%d more)" % (len(ex)-5)`
`                         break`
`                     else:`
`                         s = []`
`-                        if self.master.BShowSpotIndex:`
`+                        if self.master.BShowSpotIndex and self.master.BSpotVar:`
`                             s.append(str(e[self.master.BSpotVar]))`
`                         if self.master.BShowGeneExpression:`
`                             s.append(str(e[col]))`
`-                        if self.master.BShowAnnotation:`
`+                        if self.master.BShowAnnotation and self.master.BAnnotationVar:`
`                             s.append(str(e[self.master.BAnnotationVar]))`
`                     if body: body += "\n"`
`                     else: body=""`
`     a.setMainWidget(ow)`
` `
`     ow.show()`
`-##    data = orange.ExampleTable('wt-large')`
` ##    d = orange.ExampleTable('wt'); d.name = 'wt'`
`-    d = orange.ExampleTable('wtclassed'); d.name = 'wtclassed'`
`-    ow.dataset(d, 0)`
`+    d = orange.ExampleTable('wt-nometa'); d.name = 'wt'`
`+    ow.dataset(d, 2)`
`+    d = orange.ExampleTable('wt-nometa'); d.name = 'wt'`
`+    ow.dataset(d, 1)`
`+    ow.dataset(None, 1)`
`+    ow.dataset(None, 2)`
` ##    d = orange.ExampleTable('wt2'); d.name = 'wt2'`
` ##    ow.dataset(d, 1)`
` ##    d = orange.ExampleTable('wt3'); d.name = 'wt3'`

# OWProcessChipData.py

`+"""`
`+<name>Process Chip Data</name>`
`+<description>Pre- and post-processing of chip data.</description>`
`+<category>Genomics</category>`
`+<icon>icons/ProcessChipData.png</icon>`
`+<priority>1110</priority>`
`+"""`
`+`
`+from OWWidget import *`
`+import OWGUI`
`+from OWChipDataFiles import ChipData`
`+import chipstat`
`+`
`+class OWProcessChipData(OWWidget):`
`+    settingsList  = ["preStdMethod", "postStdMethod", "preStdRob", "postStdRob", "mergeType", "commitOnChange"]`
`+`
`+    def __init__(self, parent=None, name='Chip Data Files'):`
`+        OWWidget.__init__(self, parent, name, "Process Chip Data pre- and post-processes chip data.")`
`+        self.callbackDeposit = []`
`+`
`+        self.inputs = [("Structured Chip Data", ChipData, self.chipdata)]`
`+        self.outputs = [("Structured Chip Data", ChipData)]`
`+`
`+        self.chipdata = None; self.datasets = None`
`+        self.std = [("No preprocessing", None),`
`+                    ("Array-based standardization", chipstat.standardize_arrays),`
`+                    ("Gene-based standardization", chipstat.standardize_genes),`
`+                    ("First array-, then gene-based standardization", lambda e,r: chipstat.standardize_genes(chipstat.standardize_arrays(e,r),r)),`
`+                    ("First gene-, then array-based standardization", lambda e,r: chipstat.standardize_arrays(chipstat.standardize_genes(e,r),r))]`
`+        # Settings`
`+        self.data = None`
`+        self.preStdMethod = 0; self.preStdRob = 1`
`+        self.postStdMethod = 0; self.postStdRob = 1`
`+        `
`+        self.mergeType = 0`
`+        self.commitOnChange = 0`
`+        self.loadSettings()`
`+`
`+        # GUI`
`+        # info`
`+        box = QVGroupBox("Info", self.controlArea)`
`+        self.infoa = QLabel('No data on input.', box)`
`+        self.infob = QLabel('', box)`
`+        `
`+        # preprocessing`
`+        OWGUI.separator(self.controlArea)`
`+        box = QVGroupBox("Preprocessing", self.controlArea)`
`+        labels = [x[0] for x in self.std]`
`+        OWGUI.comboBox(box, self, 'preStdMethod', label=None, labelWidth=None, orientation='vertical', items=labels, callback=self.selectionChange)`
`+        self.preRobBtn = OWGUI.checkBox(box, self, "preStdRob", "Robust standardization", callback=self.selectionChange)`
`+        `
`+        # merge`
`+        OWGUI.separator(self.controlArea)`
`+        self.mergeTypes = [('mean', 'Mean'), ('median', 'Median'), ('min', 'Minimum expression'), ('max', 'Maximum expression')]`
`+        labels = [x[1] for x in self.mergeTypes]`
`+        OWGUI.radioButtonsInBox(self.controlArea, self, 'mergeType', labels, box='Merge Replicas', tooltips=None, callback=self.selectionChange)`
`+`
`+        # postprocessing`
`+        OWGUI.separator(self.controlArea)`
`+        box = QVGroupBox("Postprocessing", self.controlArea)`
`+        labels = [x[0] for x in self.std]`
`+        OWGUI.comboBox(box, self, 'postStdMethod', label=None, labelWidth=None, orientation='vertical', items=labels, callback=self.selectionChange)`
`+        self.postRobBtn = OWGUI.checkBox(box, self, "postStdRob", "Robust standardization", callback=self.selectionChange)`
`+`
`+        # output`
`+        OWGUI.separator(self.controlArea)`
`+        box = QVGroupBox("Output", self.controlArea)`
`+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')`
`+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.selectionChange, disabled=1)`
`+`
`+        self.setBtnsState()`
`+        self.resize(100,100)`
`+`
`+    def selectionChange(self):`
`+        self.setBtnsState()`
`+        if self.commitOnChange:`
`+            self.sendData()`
`+`
`+    def chipdata(self, data):`
`+        self.commitBtn.setEnabled(data <> None)`
`+        if data:`
`+            self.data = data`
`+            nfiles = 0`
`+            for (n, d) in data:`
`+                nfiles += len(d)`
`+            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(data), nfiles))`
`+            print data`
`+            d = data[0][1][0]`
`+            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))`
`+`
`+            self.sendData()`
`+        else:`
`+            self.send("Structured Chip Data", None)`
`+`
`+    # process arrays in the structure, returns new structure`
`+    def processArrays(self, datastructure, method, *arg):`
`+        pbStep = 30. / sum([len(data) for (dummy, data) in datastructure])`
`+        newdata = []`
`+        for (strainname, data) in datastructure:`
`+            if data:`
`+                new = []`
`+                for e in data:`
`+                    new.append(apply(method, (e,)+arg))`
`+                    self.progressBarAdvance(pbStep)`
`+                newdata.append([strainname, new])`
`+        return newdata`
`+`
`+    def sendData(self):`
`+        if not self.data:`
`+            return`
`+        self.send('Structured Chip Data', None) # this is required for other widgets not to mess up with two different datasets`
`+        self.progressBarInit()`
`+        data = self.data`
`+        # preprocessing`
`+        if self.preStdMethod: # 0 means no preprocessing`
`+            data = self.processArrays(data, self.std[self.preStdMethod][1], self.preStdRob)`
`+        self.progressBarSet(30)`
`+        # merging`
`+        merged = chipstat.merge_replicas(data, self.mergeTypes[self.mergeType][0])`
`+        self.progressBarSet(70)`
`+        # postprocessing`
`+        if self.postStdMethod: # 0 means no preprocessing`
`+            merged = self.processArrays(merged, self.std[self.postStdMethod][1], self.preStdRob)`
`+        for (i,d) in enumerate(merged):`
`+            d[1][0].name = self.data[i][0]`
`+        self.progressBarFinished()`
`+        self.send('Structured Chip Data', merged)`
`+            `
`+    def setBtnsState(self):`
`+        self.preRobBtn.setEnabled(self.preStdMethod > 0)`
`+        self.postRobBtn.setEnabled(self.postStdMethod > 0)`
`+`
`+if __name__=="__main__":`
`+    a=QApplication(sys.argv)`
`+    ow=OWProcessChipData()`
`+    a.setMainWidget(ow)`
`+`
`+    ow.show()`
`+    a.exec_loop()`
`+    ow.saveSettings()`

# OWSelectGenes.py

`+"""`
`+<name>Select Genes</name>`
`+<description>Select genes from chip based on input selectors.</description>`
`+<category>Genomics</category>`
`+<icon>icons/SelectGenes.png</icon>`
`+<priority>1150</priority>`
`+"""`
`+`
`+from OWWidget import *`
`+import OWGUI`
`+from OWChipDataFiles import ChipData`
`+from OWChipANOVA import GeneSelection`
`+import chipstat`
`+`
`+class OWSelectGenes(OWWidget):`
`+    settingsList  = ['negate', 'commitOnChange']`
`+`
`+    def __init__(self, parent=None, name='Select Genes'):`
`+        OWWidget.__init__(self, parent, name, "Select genes from chip based on input selectors")`
`+        self.callbackDeposit = []`
`+`
`+        self.inputs = [("Gene Selection", GeneSelection, self.loadselection, 0), ("Structured Chip Data", ChipData, self.chipdata, 1)]`
`+        self.outputs = [("Selected Data", ChipData), ("Other Data", ChipData), ("Gene Selection", GeneSelection)]`
`+`
`+        self.negate = 1`
`+        self.selectors = {}`
`+        self.data = None`
`+        self.commitOnChange = 1`
`+        # Settings`
`+        self.loadSettings()`
`+`
`+        # GUI`
`+        # info`
`+        box = QVGroupBox("Info", self.controlArea)`
`+        self.infoa = QLabel('No data on input.', box)`
`+        self.infob = QLabel('', box)`
`+        OWGUI.separator(self.controlArea)`
`+`
`+        # gene selection`
`+        box = QVGroupBox("Gene Selection", self.controlArea)`
`+        OWGUI.checkBox(box, self, 'negate', 'Negate', callback = self.selectionChange)`
`+`
`+        # output`
`+        box = QVGroupBox("Output", self.controlArea)`
`+        OWGUI.checkBox(box, self, 'commitOnChange', 'Commit data on selection change')`
`+        self.commitBtn = OWGUI.button(box, self, "Commit", callback=self.senddata, disabled=1)`
`+`
`+        self.resize(200,100)`
`+        `
`+    def chipdata(self, data):`
`+        if data:`
`+            self.data = data`
`+##            nfiles = 0`
`+##            for (n, d) in data:`
`+##                nfiles += len(d)`
`+##            self.infoa.setText("Microarray data, %d strains, total of %d data files" % (len(data), nfiles))`
`+##            d = data[0][1][0]`
`+##            self.infob.setText("Each data file contains %d measurements of %d genes" % (len(d.domain.attributes), len(d)))`
`+            self.senddata()`
`+        else:`
`+            self.send("Selected Data", None)`
`+            self.send("Other Data", None)`
`+            self.send("Gene Selection", None)`
`+        self.commitBtn.setEnabled(data <> None)`
`+`
`+    def loadselection(self, selector, id):`
`+        if selector:`
`+            self.selectors[id] = selector`
`+        else:`
`+            del self.selectors[id]`
`+        print 'SELN',`
`+        for s in self.selectors.values():`
`+            print len(s),`
`+        print`
`+##        print self.selectors`
`+        self.infoa.setText('%d selectors on input' % len(self.selectors))`
`+        self.senddata()`
`+        self.commitBtn.setEnabled(self.selectors <> None and len(self.selectors))`
`+`
`+    def senddata(self):`
`+        if len(self.selectors):`
`+            self.progressBarInit()`
`+            s = self.selectors.values()`
`+##            match = map(lambda *args: min(args), self.selectors.values())`
`+            # this needs to be changed!!!`
`+            match = []`
`+            n = len(s)`
`+            for i in range(len(s[0])):`
`+                mm = []`
`+                for j in range(n):`
`+                    mm.append(s[j][i])`
`+                match.append(min(mm))`
`+                `
`+            if self.negate:`
`+                match = [(1,0)[x] for x in match]`
`+            self.match = match`
`+            self.progressBarSet(20)`
`+`
`+            self.send("Gene Selection", self.match)`
`+            if self.data:`
`+                n = self.match.count(1)`
`+                if n==len(self.data[0][1][0]): # all genes match`
`+                    self.progressBarFinished()`
`+                    self.send("Selected Data", self.data)`
`+                    self.send("Other Data", None)`
`+                elif n==0:`
`+                    self.progressBarFinished()`
`+                    self.send("Selected Data", None)`
`+                    self.send("Other Data", self.data)`
`+                else:`
`+                    P = []; N = []`
`+                    pbStep = 80. / len(self.data)`
`+                    for (strainname, data) in self.data:`
`+                        dataP = [d.select(self.match) for d in data]`
`+                        dataN = [d.select(self.match, negate=1) for d in data]`
`+                        for i in range(len(data)):`
`+                            dataP[i].name = dataN[i].name = data[i].name`
`+                        P.append((strainname, dataP))`
`+                        N.append((strainname, dataN))`
`+                        self.progressBarAdvance(pbStep)`
`+                    self.send("Selected Data", P)`
`+                    self.send("Other Data", N)`
`+            self.progressBarFinished()`
`+`
`+    def selectionChange(self):`
`+        pass`
`+`
`+if __name__=="__main__":`
`+    a=QApplication(sys.argv)`
`+    ow=OWSelectGenes()`
`+    a.setMainWidget(ow)`
`+    ow.show()`
`+    a.exec_loop()`
`+    ow.saveSettings()`

# chipstat.py

`+import math`
`+import Numeric, MA`
`+import LinearAlgebra`
`+import stats`
`+import NumExtn`
`+import os, os.path`
`+import orange`
`+`
`+#########################################################################`
`+## standardization of profiles / arrays`
`+#########################################################################`
`+`
`+def standardize_arrays(et, robust=True):`
`+    """Returns a new example table where values are standardized to mean zero and unit variance columnwise.`
`+    Option: parametric (mean/stddev) or non-parametric (median/mad).`
`+    """`
`+    ma = orng2ma(et)`
`+    if robust:`
`+        return ma2orng_keepClassMetas(scaleMad(centerMed(ma)), et)`
`+    else:`
`+        return ma2orng_keepClassMetas(scaleStd(centerMean(ma)), et)`
`+    `
`+`
`+def standardize_genes(et, robust=True):`
`+    """Returns a new example table where values are standardized to mean zero and unit variance rowwise.`
`+    Option: parametric (mean/stddev) or non-parametric (median/mad).`
`+    """`
`+    ma = orng2ma(et)`
`+    if robust:`
`+        return ma2orng_keepClassMetas(scaleMad(centerMed(ma,1),1), et)`
`+    else:`
`+        return ma2orng_keepClassMetas(scaleStd(centerMean(ma,1),1), et)`
`+`
`+`
`+#########################################################################`
`+## merge replicas by mean, median, min, max`
`+#########################################################################`
`+`
`+def merge_replicas(aETStruct, type):`
`+    """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]);`
`+    type = ["mean" | "median" | "min" | "max"]`
`+    """`
`+    shape = [0,0,0]`
`+    et0 = aETStruct[0][1][0]                                            # the first example table`
`+    shape[0] = len(et0)                                                 # number of examples (genes)`
`+    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)`
`+    mergedETStruct = []`
`+    if type == "mean":`
`+        merge_func = MA.average`
`+    elif type == "median":`
`+        merge_func = NumExtn.medianMA`
`+    elif type == "min":`
`+        merge_func = NumExtn.minMA`
`+    elif type == "max":`
`+        merge_func = NumExtn.maxMA`
`+    else:`
`+        raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']"`
`+    for st, etList in aETStruct:`
`+        shape[2] = len(etList)`
`+        ma3d = MA.zeros(shape, MA.Float)`
`+        for idx, et in enumerate(etList):`
`+            ma3d[:,:,idx] = orng2ma(et)`
`+        mergedETStruct.append((st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])]))`
`+    return mergedETStruct`
`+        `
`+`
`+`
`+###########################################################################`
`+#### average expression profile`
`+###########################################################################`
`+##`
`+##def average_replicas(aETStruct):`
`+##    """Returns a list of tuples (strain, avrg_orngET) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]).`
`+##    """`
`+##    shape = [0,0,0]`
`+##    et0 = aETStruct[0][1][0]                                            # the first example table`
`+##    shape[0] = len(et0)                                                 # number of examples (genes)`
`+##    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)`
`+##    avrgETStruct = []`
`+##    for st, etList in aETStruct:`
`+##        shape[2] = len(etList)`
`+##        ma3d = MA.zeros(shape, MA.Float)`
`+##        for idx, et in enumerate(etList):`
`+##            ma3d[:,:,idx] = orng2ma(et)`
`+##        avrgETStruct.append((st, ma2orng(MA.average(ma3d, 2), et.domain)))`
`+##    return avrgETStruct`
`+`
`+`
`+#########################################################################`
`+## ANOVA on genes`
`+#########################################################################`
`+`
`+class AnovaOnGeneResults:`
`+    """A structure to store the results of ANOVA on genes where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]).`
`+    """`
`+    def __init__(self, aETStruct, anovaList):`
`+        self.classNames = map(lambda x: x[0], aETStruct)`
`+        self.anovaList = anovaList`
`+`
`+`
`+def anova_on_genes(aETStruct, repMeasuresOnTime=False, callback = None):`
`+    """Conducts ANOVA on genes and returns a tuple (list of [pTime, pStrain, pInter], list of ANOVA objects needed to conduct posthoc tests)`
`+    where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]).`
`+    Note: time points that cause empty cells are removed prior to conducting ANOVA.`
`+    """`
`+    ma3d = etStruct2ma3d(aETStruct)`
`+    rgLens = Numeric.array(map(lambda x: len(x[1]), aETStruct))`
`+    # arrays to store p-vals`
`+    FprobTime = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)`
`+    FprobStrain = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)`
`+    FprobTimeStrain = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)`
`+    # store ANOVA objects`
`+    anovaList = []`
`+    # decide between non-repeated / repeated measures ANOVA for factor time`
`+    if repMeasuresOnTime:`
`+        fAnova = AnovaRM12LR`
`+    else:`
`+        fAnova = Anova2wayLR`
`+    # check for empty cells for all genes at once and remove them`
`+    tInd2rem = []`
`+    ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(rgLens)))`
`+    for tIdx in range(ma3d.shape[1]):`
`+        for rIdx in range(rgLens.shape[0]):`
`+            if Numeric.add.reduce(MA.count(ma3d[:,tIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0:`
`+                tInd2rem.append(tIdx)`
`+                break`
`+    if len(tInd2rem) > 0:`
`+        print "Warning: removing time indices %s for all genes" % (str(tInd2rem))`
`+        tInd2keep = range(ma3d.shape[1])`
`+        for tIdx in tInd2rem:`
`+            tInd2keep.remove(tIdx)`
`+        ma3d = MA.take(ma3d, tInd2keep, 1)`
`+    # for each gene...`
`+    for gIdx in range(ma3d.shape[0]):`
`+        # check for empty cells for that gene`
`+        tInd2rem = []`
`+        for tIdx in range(ma3d.shape[1]):`
`+            for rIdx in range(rgLens.shape[0]):`
`+                if MA.count(ma3d[gIdx,tIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]]) == 0:`
`+                    tInd2rem.append(tIdx)`
`+                    break`
`+        ma2d = ma3d[gIdx]`
`+        if len(tInd2rem) > 0:`
`+            print "Warning: removing time indices %s for gene %i" % (str(tInd2rem), gIdx)`
`+            tInd2keep = range(ma3d.shape[1])`
`+            for tIdx in tInd2rem:`
`+                tInd2keep.remove(tIdx)`
`+            ma2d = MA.take(ma2d, tInd2keep, 0)`
`+        # conduct anova ANOVA`
`+        an = fAnova(ma2d, rgLens, 1)`
`+        if callback:`
`+            callback()`
`+        FprobTime[gIdx] = an.FAprob`
`+        FprobStrain[gIdx] = an.FBprob`
`+        FprobTimeStrain[gIdx] = an.FABprob`
`+        anovaList.append(an)`
`+    return Numeric.transpose(Numeric.array([FprobTime, FprobStrain, FprobTimeStrain])).tolist(), AnovaOnGeneResults(aETStruct, anovaList)`
`+`
`+`
`+#########################################################################`
`+## POSTHOC ANOVA on genes`
`+#########################################################################`
`+`
`+def posthoc_anova_on_genes(className1, className2, aAnovaOnGeneResults, callback=None):`
`+    """Conduct posthoc anova test for the given class names and returns a list of of [pTime, pStrain, pInter].`
`+    """`
`+    cIdx1 = aAnovaOnGeneResults.classNames.index(className1)`
`+    cIdx2 = aAnovaOnGeneResults.classNames.index(className2)`
`+    # arrays to store p-vals`
`+    numGenes = len(aAnovaOnGeneResults.anovaList)`
`+    FpTime = -1*Numeric.ones((numGenes,), Numeric.Float)`
`+    FpStrain = -1*Numeric.ones((numGenes,), Numeric.Float)`
`+    FpTimeStrain = -1*Numeric.ones((numGenes,), Numeric.Float)`
`+    for idx, an in enumerate(aAnovaOnGeneResults.anovaList):`
`+        FpTime[idx], FpStrain[idx], FpTimeStrain[idx] = an.single_posthoc_anova_B_AB(cIdx1, cIdx2)`
`+        if callback: callback()`
`+    return Numeric.transpose(Numeric.array([FpTime, FpStrain, FpTimeStrain])).tolist()`
`+        `
`+`
`+###################################################################################`
`+## HELPER FUNCTIONS`
`+###################################################################################`
`+`
`+def scaleStd(nm, axis=0):`
`+    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""`
`+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)`
`+    return NumExtn.swapaxesMA(nm / NumExtn.stdMA(nm,0), 0, axis)`
`+`
`+def scaleMad(nm, axis=0):`
`+    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""`
`+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)`
`+    return NumExtn.swapaxesMA(nm / NumExtn.madMA(nm,0), 0, axis)`
`+`
`+`
`+def centerMean(nm, axis=0):`
`+    """Returns new masked numarray with values centered by subtracting Median."""`
`+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)`
`+    return NumExtn.swapaxesMA(nm - MA.average(nm,0), 0, axis)`
`+`
`+`
`+def centerMed(nm, axis=0):`
`+    """Returns new masked numarray with values centered by subtracting Median."""`
`+    nm = NumExtn.swapaxesMA(MA.asarray(nm, MA.Float), 0, axis)`
`+    return NumExtn.swapaxesMA(nm - NumExtn.medianMA(nm,0), 0, axis)`
`+`
`+`
`+def getETStruct(path):`
`+    """Returns a list of tuples (strain, [orngET1, orngET2, ...])`
`+    """`
`+    etStruct = []`
`+    strains = os.listdir(path)`
`+    for st in strains:`
`+        pathSt = path + "\\" + st`
`+        replicas = os.listdir(pathSt)`
`+        stEts = []`
`+        for rep in replicas:`
`+            stEts.append(orange.ExampleTable(pathSt + "\\" + rep))`
`+        etStruct.append((st, stEts))`
`+    return etStruct`
`+`
`+`
`+def etStruct2ma3d(aETStruct):`
`+    """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it.`
`+    """`
`+    shape = [0,0,0]`
`+    et0 = aETStruct[0][1][0]                                            # the first example table`
`+    shape[0] = len(et0)                                                 # number of examples (genes)`
`+    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)`
`+    shape[2] = Numeric.add.reduce(map(lambda x: len(x[1]), aETStruct))  # number of ETs (replicas over all strains)`
`+    ma3d = MA.zeros(shape, MA.Float)`
`+    k = 0`
`+    for st, etList in aETStruct:`
`+        for et in etList:`
`+            ma3d[:,:,k] = orng2ma(et)`
`+            k += 1`
`+    return ma3d`
`+`
`+`
`+def orng2ma(aExampleTable):`
`+    """Converts orange.ExampleTable to MA.array based on the attribute values.`
`+    rows correspond to examples, columns correspond to attributes, class values are left out`
`+    missing values and attributes of types other than orange.FloatVariable are masked`
`+    """`
`+    vals = aExampleTable.native(0, substituteDK="?", substituteDC="?", substituteOther="?")`
`+    ma = MA.array(vals, MA.PyObject)`
`+    if aExampleTable.domain.classVar != None:`
`+        ma = ma[:,:-1]`
`+    mask = MA.where(MA.equal(ma, "?"), 1, 0)`
`+    for varIdx, var in enumerate(aExampleTable.domain.attributes):`
`+        if type(var) != orange.FloatVariable:`
`+            mask[:,varIdx] = Numeric.ones(len(aExampleTable))`
`+    return MA.array(MA.array(ma, MA.PyObject, mask=mask).filled(1e20), MA.Float, mask=mask)`
`+`
`+`
`+def ma2orng(arr2d, aDomain):`
`+    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.`
`+    masked values converted to "?" (don't knows)`
`+    arr2d.shape[1] must be equal to the number of the attributes of the given domain`
`+    domain attributes mut be of type orange.FloatVariable`
`+    """`
`+    arr2d = MA.asarray(arr2d, MA.PyObject)`
`+    assert MA.rank(arr2d) == 2, "2d array expected"`
`+    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"`
`+    if aDomain.classVar != None:`
`+        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))`
`+        return orange.ExampleTable(aDomain, et)`
`+    else:`
`+        return orange.ExampleTable(aDomain, arr2d.tolist("?"))`
`+`
`+`
`+def ma2orng_keepClassMetas(arr2d, aExampleTable):`
`+    """Creates new example table where attribute values correspond to the given 2D array, class and meta attributes remain unchanged.`
`+    """`
`+    arr2d = MA.asarray(arr2d, MA.PyObject)`
`+    assert MA.rank(arr2d) == 2, "2D array expected"`
`+    assert arr2d.shape[0] == len(aExampleTable), "arr2d.shape[0] != len(aExampleTable)"`
`+    assert arr2d.shape[1] == len(aExampleTable.domain.attributes), "arr2d.shape[1] != len(aExampleTable.domain.attributes)"`
`+    domAtt = orange.Domain(aExampleTable.domain.attributes, None)`
`+    if aExampleTable.domain.classVar != None:`
`+        domClassMeta = orange.Domain([aExampleTable.domain.classVar])`
`+    else:`
`+        domClassMeta = orange.Domain([])`
`+    domClassMeta.addmetas(aExampleTable.domain.getmetas())`
`+    etAtt = orange.ExampleTable(domAtt, arr2d.tolist("?"))`
`+    etClassMeta = orange.ExampleTable(domClassMeta, aExampleTable)`
`+    return orange.ExampleTable([etAtt, etClassMeta])`
`+`
`+`
`+###################################################################################`
`+## PURE ANOVA BASED ON LINEAR REGRESSION`
`+###################################################################################`
`+`
`+class AnovaLRBase:`
`+    """Base class for all ANOVA types based on multivariate linear regression: manipulation with dummy variables.`
`+    """`
`+`
`+    def getDummyRC(self, numVars, numReplicas=1):`
`+        """Returns 2D array of reference cell encoded dummy variables.`
`+        """`
`+        if not type(numReplicas) == int:`
`+            numReplicas = Numeric.asarray(numReplicas)`
`+            assert len(numReplicas) == numVars, "numReplicas: int or argument of length numVars expected"`
`+        return Numeric.repeat(Numeric.concatenate((Numeric.zeros((1,numVars-1), Numeric.Float), Numeric.identity(numVars-1, Numeric.Float)), 0), numReplicas, 0)`
`+        `
`+`
`+    def getDummyEE(self, numVars, numReplicas=1):`
`+        """Returns 2D array of effects endoced dummy variables, shape (n, n-1) where n = numVars*numReplicas | sum(numReplicas).`
`+        Each row represents a code for a single variable value, encoded by numVar-1 dummy variables.`
`+        numReplicas = int | list | array of shape (numVars,): the number (or list) of observations per cell.`
`+        """`
`+        assert numVars > 0, "numVars <= 0"`
`+        if type(numReplicas) == int:`
`+            numReplicas = Numeric.ones((numVars,)) * numReplicas`
`+        else:`
`+            numReplicas = Numeric.asarray(numReplicas)`
`+            assert len(numReplicas) == numVars, "numReplicas: int or argument of length numVars expected"`
`+        if numVars == 1:`
`+            return Numeric.zeros((numReplicas[0], 0))`
`+        else:`
`+            return Numeric.repeat(Numeric.concatenate((Numeric.identity(numVars-1, Numeric.Int), -1*Numeric.ones((1,numVars-1), Numeric.Int)), 0), numReplicas, 0)`
`+        `
`+`
`+    def getSubjectDummyEE(self, numSubjList, numReplicas=1):`
`+        """Returns 2D array of effects endoced subject dummy variables, shape (sum(ni), sum(ni-1)) where numSubjList=[n0,n1,...].`
`+        numSubjList: a list of the number of subjects within each level of the non-repeated factor.`
`+        numReplicas: int | list | array of shape sum(ni): the number of times each subject code (row) is repeated.`
`+        """`
`+        numSubjList = Numeric.asarray(numSubjList)`
`+        assert len(numSubjList.shape) == 1, "len(numSubjList.shape) != 1"`
`+        if type(numReplicas) == int:`
`+            numReplicas = Numeric.ones((numSubjList.shape[0],)) * numReplicas`
`+        else:`
`+            numReplicas = Numeric.asarray(numReplicas)`
`+            assert len(numReplicas.shape) == 1, "len(numReplicas.shape) != 1"`
`+            assert numReplicas.shape[0] == numSubjList.shape[0], "numReplicas: int or a list of the same length as numSubjList expected"`
`+        si0 = Numeric.concatenate(([0], Numeric.add.accumulate(numSubjList)))      # indices for axis 0 of dm`
`+        si1 = Numeric.concatenate(([0], Numeric.add.accumulate(numSubjList-1)))    # indices for axis 1 of dm`
`+        dm = Numeric.zeros((si0[-1], si1[-1]))                                     # subject dummy codes matrix`
`+        numRepeats = Numeric.ones((si0[-1],))`
`+        for i in range(numSubjList.shape[0]):`
`+            if numSubjList[i] == 1: print "Warning: a single subject in group %i" % (i)`
`+            dm[si0[i]:si0[i+1], si1[i]:si1[i+1]] = self.getDummyEE(numSubjList[i],1)`
`+            numRepeats[si0[i]:si0[i+1]] = Numeric.ones((numSubjList[i],)) * numReplicas[i]`
`+        return Numeric.repeat(dm, numRepeats, 0)`
`+`
`+`
`+    def getDummiesJoin(self, dummyMtrx1, dummyMtrx2):`
`+        """Returns a tuple of resized dummy matrices containing the cartesian product of rows.`
`+        """`
`+        dummyMtrx1 = Numeric.repeat(dummyMtrx1, dummyMtrx2.shape[0], 0)                         # repeat each row separately`
`+        dummyMtrx2 = Numeric.resize(dummyMtrx2, (dummyMtrx1.shape[0], dummyMtrx2.shape[1]))     # repeat whole matrix, add new rows`
`+        return (dummyMtrx1, dummyMtrx2)`
`+`
`+`
`+    def getDummyInteraction(self, dummyMtrx1, dummyMtrx2):`
`+        """Returns a product of dummy matrices resized to the cartesian product of columns.`
`+        """`
`+        dmIntShp = (dummyMtrx1.shape[0], dummyMtrx1.shape[1] * dummyMtrx2.shape[1])`
`+        dmInt1 = Numeric.repeat(dummyMtrx1, dummyMtrx2.shape[1], 1)                             # repeat each column separately`
`+        dmInt2 = Numeric.transpose(Numeric.resize(Numeric.transpose(dummyMtrx2, (1,0)), (dmIntShp[1], dmIntShp[0])), (1,0)) # repeat whole matrix, add new columns`
`+        return dmInt1*dmInt2`
`+`
`+`
`+class Anova2wayLRBase(AnovaLRBase):`
`+    """Base class for all 2 way ANOVA types (repeated and non-repeated measures) includes various post-hoc tests.`
`+    The following variables should be defined by base classes:`
`+        self._addInteraction = addInteraction`
`+        self._arr2d = arr2d`
`+        self._groupLens = groupLens`
`+        self._MSpool = LR_treat.MSres`
`+        self._DFpool = LR_treat.MSreg`
`+    """`
`+`
`+    def posthoc_tstat_B(self):`
`+        """Fisher LSD: Compares all levels of factor B and returns (b,b) matrix of t-stat values and a matrix of corresponding p-values.`
`+        Use when there is no significant interaction.`
`+        Warning: Statistica computes it WRONG!`
`+        """`
`+        b = self._groupLens.shape[0]`
`+        # precompute averages`
`+        x_avrg = Numeric.zeros((b,), Numeric.Float)         # mean of cell means over factor A`
`+        sum_count_x = Numeric.zeros((b,), Numeric.Float)`
`+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())`
`+        for idx in range(b):`
`+            groupInd = range(groupLensAcc0[idx], groupLensAcc0[idx+1])`
`+            xi = MA.take(self._arr2d, groupInd, 1)`
`+            x_avrg[idx] = MA.average(MA.average(xi,1))                  # first average over replicas to obtain cell mean, then over factor A`
`+            sum_count_x[idx] = Numeric.add.reduce(1./MA.count(xi,1))    # first count the number of measurements within cells, then sum inverses over factor A`
`+            ##x_avrg[idx] = MA.average(MA.ravel(xi))                    # this is how Statistica computes it, but it is WRONG!`
`+            ##sum_count_x[idx] = 1./MA.count(xi)                        # this is how Statistica computes it, but it is WRONG!`
`+        # t-statistics`
`+        x_avrg_2 = MA.resize(x_avrg, (x_avrg.shape[0], x_avrg.shape[0]))`
`+        sum_count_x_2 = Numeric.resize(sum_count_x, (sum_count_x.shape[0],sum_count_x.shape[0]))`
`+        tstat = (MA.transpose(x_avrg_2) - x_avrg_2) * self._arr2d.shape[0] / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2))`
`+        ##tstat = (MA.transpose(x_avrg_2) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2))     # this is how Statistica computes it, but it is WRONG!`
`+        tprob = NumExtn.triangularPut(stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + NumExtn.triangularGet(tstat**2))),1,1)`
`+        return tstat, tprob`
`+`
`+`
`+    def isSignif_holm_B(self, alpha):`
`+        """Conduct Holm step-down sequential multiple comparison on factor B.`
`+        Returns (b,b) matrix indicating significant differences between levels of factor B and the direction of changes [-1|0|1].`
`+        """`
`+        tstat, pvals = self.posthoc_tstat_B()`
`+        pvals = NumExtn.triangularGet(pvals)`
`+        sortInd = Numeric.argsort(pvals)`
`+        k = pvals.shape[0]`
`+        isSignif = -1*Numeric.ones((k,), Numeric.Int)`
`+        for j in range(k):`
`+            isSignif[sortInd[j]] = pvals[sortInd[j]] < float(alpha) / (k-j)`
`+        return NumExtn.triangularPut(isSignif, upper=1, lower=1) * (MA.greater(tstat,0) - MA.less(tstat,0))`
`+`
`+`
`+    def posthoc_tstat_BbyA(self):`
`+        """Fisher LSD: Compares all levels of factor B by each level of factor A separately.`
`+        Returns: (b,b,a) matrix of t-stat values`
`+                 (b,b,a) matrix of corresponding p-values`
`+                 (b,b) matrix of geometric means of p-values over all levels of factor A.`
`+        Use when there is a significant interaction and factor A is of no interest.`
`+        """`
`+        a = self._arr2d.shape[0]`
`+        b = self._groupLens.shape[0]`
`+        # precompute averages`
`+        x_avrg = Numeric.zeros((a,b), Numeric.Float)`
`+        sum_count_x = Numeric.zeros((a,b), Numeric.Float)`
`+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())`
`+        for idx in range(b):`
`+            groupInd = range(groupLensAcc0[idx], groupLensAcc0[idx+1])`
`+            xi = MA.take(self._arr2d, groupInd, 1)`
`+            x_avrg[:,idx] = MA.average(xi, 1)`
`+            sum_count_x[:,idx] = 1. / MA.count(xi, 1)`
`+        # t-statistics`
`+        x_avrg_2 = MA.resize(x_avrg, (b,a,b))`
`+        sum_count_x_2 = Numeric.resize(sum_count_x, (b,a,b))`
`+        tstat = (MA.transpose(x_avrg_2, (2,1,0)) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2, (2,1,0)) + sum_count_x_2))`
`+        # get p-values for each level of factor a separately (axis 1)`
`+        tprob = MA.array(-1*Numeric.ones(tstat.shape, Numeric.Float), mask=Numeric.ones(tstat.shape))`
`+        for idx1 in range(tprob.shape[1]):`
`+            tprob[:,idx1,:] = NumExtn.triangularPut(stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + NumExtn.triangularGet(tstat[:,idx1,:]**2))),1,1)`
`+        return tstat, tprob`
`+`
`+`
`+    def posthoc_anova_B_AB(self):`
`+        """Conduct ANOVA for each pair of levels of factor B to detect interaction effect.`
`+        Returns for each pair of factor B levels:`
`+            (b,b) matrix of F-stat values for effect of factor B `
`+            (b,b) matrix of p-values for effect of factor B`
`+            (b,b) matrix of F-stat values for interaction effect`
`+            (b,b) matrix of p-values for interaction effect`
`+        """`
`+        if self._addInteraction != 1:`
`+            raise "Error: posthoc_anova_B_AB can be conducted only when the interaction effect has been tested"`
`+        b = self._groupLens.shape[0]`
`+        FB = MA.masked * MA.ones((b,b), MA.Float)`
`+        FBprob = MA.masked * MA.ones((b,b), MA.Float)`
`+        FAB = MA.masked * MA.ones((b,b), MA.Float)`
`+        FABprob = MA.masked * MA.ones((b,b), MA.Float)`
`+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())`
`+        groupInd = map(lambda i,j: range(i,j), groupLensAcc0[:-1],groupLensAcc0[1:])`
`+        for i in range(b):`
`+            for j in range(i+1,b):`
`+                takeInd = groupInd[i] + groupInd[j]`
`+                an = self.__class__(MA.take(self._arr2d, takeInd, 1), [self._groupLens[i],self._groupLens[j]], addInteraction=1)`
`+                FB[i,j] = FB[j,i] = an.FB`
`+                FBprob[i,j] = FBprob[j,i] = an.FBprob`
`+                FAB[i,j] = FAB[j,i] = an.FAB`
`+                FABprob[i,j] = FABprob[j,i] = an.FABprob`
`+        return FB, FBprob, FAB, FABprob`
`+`
`+`
`+    def single_posthoc_anova_B_AB(self, bLevel1, bLevel2):`
`+        """Conduct ANOVA for a specific pair of factor B levels and returns (FprobA, FprobB, FprobAB)`
`+        """`
`+        if self._addInteraction != 1:`
`+            raise "Error: single_posthoc_anova_B_AB can be conducted only when the interaction effect has been tested"`
`+        groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist())`
`+        groupInd = map(lambda i,j: range(i,j), groupLensAcc0[:-1],groupLensAcc0[1:])`
`+        takeInd = groupInd[bLevel1] + groupInd[bLevel2]`
`+        an = self.__class__(MA.take(self._arr2d, takeInd, 1), [self._groupLens[bLevel1],self._groupLens[bLevel2]], addInteraction=1)`
`+        return an.FAprob, an.FBprob, an.FABprob`
`+`
`+    `
`+class Anova2wayLR(Anova2wayLRBase):`
`+    """2 way ANOVA with multiple observations per cell for unbalanced designs using multivariate linear regression.`
`+    Multiple observations given at axis 1 together with the list of group lenghts, e.g. [3,2].`
`+    Supports balanced and unbalanced designs, i.e. different number of observations per cell and missing (masked) data.`
`+    TODO: account for empty cells (this implementation results in singular design matrix)`
`+    """`
`+`
`+    def __init__(self, arr2d, groupLens, addInteraction=0):`
`+        """arr2d: 2D masked array where replicas are given at axis 1 together with lenghts of the groups (multiple observations per cell).`
`+        addInteraction: include / exclude the interaction between factors`
`+        """`
`+        arr2d = MA.asarray(arr2d)`
`+        groupLens = Numeric.array(groupLens)    # make a copy for the case if subjects or levels of factor B are removed`
`+        assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2"`
`+`
`+        # check if there exist factor-levels with all values missing (for A and B)`
`+        # if so, reduce the corresponding DF and fix the number of dummy variables`
`+        missIndA = Numeric.compress(MA.count(arr2d, 1) == 0, Numeric.arange(arr2d.shape[0]))`
`+        if missIndA.shape[0] > 0:`
`+            print "Warning: removig factor A level(s) %s" % str(missIndA.tolist())`
`+            takeIndA = Numeric.compress(MA.count(arr2d, 1) != 0, Numeric.arange(arr2d.shape[0]))`
`+            arr2d = MA.take(arr2d, takeIndA, 0)`
`+        missIndSubj = Numeric.compress(MA.count(arr2d, 0) == 0, Numeric.arange(arr2d.shape[1]))`
`+        if missIndSubj.shape[0] > 0:`
`+            takeIndSubj = Numeric.compress(MA.count(arr2d, 0) != 0, Numeric.arange(arr2d.shape[1]))`
`+            arr2d = MA.take(arr2d, takeIndSubj, 1)`
`+            # fix groupLens`
`+            mapSubj2Group = -1*Numeric.ones(arr2d.shape[1])`
`+            groupLensAcc0 = [0] + Numeric.add.accumulate(groupLens).tolist()`
`+            for i in range(len(groupLensAcc0) - 1):`
`+                mapSubj2Group[groupLensAcc0[i]:groupLensAcc0[i+1]] = i`
`+            for subjIdx in missIndSubj:`
`+                groupLens[mapSubj2Group[subjIdx]] -= 1`
`+        # fix number of factor B levels`
`+        missIndB = Numeric.compress(groupLens <= 0, Numeric.arange(groupLens.shape[0]))`
`+        if missIndB.shape[0] > 0:`
`+            print "Warning: removig factor B level(s) %s" % str(missIndB.tolist())`
`+            takeIndB = Numeric.compress(groupLens > 0, Numeric.arange(groupLens.shape[0]))`
`+            groupLens = Numeric.take(groupLens, takeIndB)`
`+            # arr2d has already been taken care of by removing the subjects without observations`
`+`
`+        # check if data is balanced`
`+        isBalanced = (1. * Numeric.add.reduce((1.*groupLens/groupLens[0]) == Numeric.ones(groupLens.shape[0])) / groupLens.shape[0]) == 1`
`+`
`+        # check for empty cells, raise exception if empty cells exist and addInteraction=1`
`+        if addInteraction:`
`+            ax1Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens)))`
`+            for idx in range(groupLens.shape[0]):`
`+                if Numeric.add.reduce(MA.count(arr2d[:,ax1Ind[idx]:ax1Ind[idx+1]],1) == 0) > 0:`
`+                    raise ValueError, "arr2d has empty cells, cannot evaluate the interaction between factors, try addInteraction=0"`
`+`
`+        # remove missing observations and the corresponding dummy variable codes`
`+        self.y = MA.ravel(arr2d) # [a0b0, a0b1, .., a1b0, ...]`
`+        noMissing = MA.count(self.y) == self.y.shape[0]`
`+        self.y, takeInd = NumExtn.compressIndices(self.y)`
`+`
`+        # dummy variables`
`+        self.dummyA = self.getDummyEE(arr2d.shape[0], 1)`
`+        self.dummyB = self.getDummyEE(len(groupLens), groupLens)`
`+        self.dummyA, self.dummyB = self.getDummiesJoin(self.dummyA, self.dummyB)`
`+        if addInteraction:`
`+            self.dummyAB = self.getDummyInteraction(self.dummyA, self.dummyB)`