Commits

mitar committed 0d6de79

Moving files around.

Comments (0)

Files changed (6)

Orange/bioinformatics/pstat.py

+# Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
+#
+# This software is distributable under the terms of the GNU
+# General Public License (GPL) v2, the text of which can be found at
+# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
+# using this module constitutes acceptance of the terms of this License.
+#
+# Disclaimer
+# 
+# This software is provided "as-is".  There are no expressed or implied
+# warranties of any kind, including, but not limited to, the warranties
+# of merchantability and fittness for a given application.  In no event
+# shall Gary Strangman be liable for any direct, indirect, incidental,
+# special, exemplary or consequential damages (including, but not limited
+# to, loss of use, data or profits, or business interruption) however
+# caused and on any theory of liability, whether in contract, strict
+# liability or tort (including negligence or otherwise) arising in any way
+# out of the use of this software, even if advised of the possibility of
+# such damage.
+#
+# Comments and/or additions are welcome (send e-mail to:
+# strang@nmr.mgh.harvard.edu).
+# 
+"""
+pstat.py module
+
+#################################################
+#######  Written by:  Gary Strangman  ###########
+#######  Last modified:  Jun 29, 2001 ###########
+#################################################
+
+This module provides some useful list and array manipulation routines
+modeled after those found in the |Stat package by Gary Perlman, plus a
+number of other useful list/file manipulation functions.  The list-based
+functions include:
+
+      abut (source,*args)
+      simpleabut (source, addon)
+      colex (listoflists,cnums)
+      collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
+      dm (listoflists,criterion)
+      flat (l)
+      linexand (listoflists,columnlist,valuelist)
+      linexor (listoflists,columnlist,valuelist)
+      linedelimited (inlist,delimiter)
+      lineincols (inlist,colsize) 
+      lineincustcols (inlist,colsizes)
+      list2string (inlist)
+      makelol(inlist)
+      makestr(x)
+      printcc (lst,extra=2)
+      printincols (listoflists,colsize)
+      pl (listoflists)
+      printl(listoflists)
+      replace (lst,oldval,newval)
+      recode (inlist,listmap,cols='all')
+      remap (listoflists,criterion)
+      roundlist (inlist,num_digits_to_round_floats_to)
+      sortby(listoflists,sortcols)
+      unique (inlist)
+      duplicates(inlist)
+      writedelimited (listoflists, delimiter, file, writetype='w')
+
+Some of these functions have alternate versions which are defined only if
+Numeric (NumPy) can be imported.  These functions are generally named as
+above, with an 'a' prefix.
+
+      aabut (source, *args)
+      acolex (a,indices,axis=1)
+      acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
+      adm (a,criterion)
+      alinexand (a,columnlist,valuelist)
+      alinexor (a,columnlist,valuelist)
+      areplace (a,oldval,newval)
+      arecode (a,listmap,col='all')
+      arowcompare (row1, row2)
+      arowsame (row1, row2)
+      asortrows(a,axis=0)
+      aunique(inarray)
+      aduplicates(inarray)
+
+Currently, the code is all but completely un-optimized.  In many cases, the
+array versions of functions amount simply to aliases to built-in array
+functions/methods.  Their inclusion here is for function name consistency.
+"""
+
+## CHANGE LOG:
+## ==========
+## 01-11-15 ... changed list2string() to accept a delimiter
+## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
+## 01-05-31 ... added duplicates() and aduplicates() functions
+## 00-12-28 ... license made GPL, docstring and import requirements
+## 99-11-01 ... changed version to 0.3
+## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
+## 03/27/99 ... added areplace function, made replace fcn recursive
+## 12/31/98 ... added writefc function for ouput to fixed column sizes
+## 12/07/98 ... fixed import problem (failed on collapse() fcn)
+##              added __version__ variable (now 0.2)
+## 12/05/98 ... updated doc-strings
+##              added features to collapse() function
+##              added flat() function for lists
+##              fixed a broken asortrows() 
+## 11/16/98 ... fixed minor bug in aput for 1D arrays
+##
+## 11/08/98 ... fixed aput to output large arrays correctly
+
+import stats  # required 3rd party module
+import string, copy
+from types import *
+
+__version__ = 0.4
+
+###===========================  LIST FUNCTIONS  ==========================
+###
+### Here are the list functions, DEFINED FOR ALL SYSTEMS.
+### Array functions (for NumPy-enabled computers) appear below.
+###
+
+def abut (source,*args):
+    """
+Like the |Stat abut command.  It concatenates two lists side-by-side
+and returns the result.  '2D' lists are also accomodated for either argument
+(source or addon).  CAUTION:  If one list is shorter, it will be repeated
+until it is as long as the longest list.  If this behavior is not desired,
+use pstat.simpleabut().
+
+Usage:   abut(source, args)   where args=any # of lists
+Returns: a list of lists as long as the LONGEST list past, source on the
+         'left', lists in <args> attached consecutively on the 'right'
+"""
+
+    if type(source) not in [ListType,TupleType]:
+        source = [source]
+    for addon in args:
+        if type(addon) not in [ListType,TupleType]:
+            addon = [addon]
+        if len(addon) < len(source):                # is source list longer?
+            if len(source) % len(addon) == 0:        # are they integer multiples?
+                repeats = len(source)/len(addon)    # repeat addon n times
+                origadd = copy.deepcopy(addon)
+                for i in range(repeats-1):
+                    addon = addon + origadd
+            else:
+                repeats = len(source)/len(addon)+1  # repeat addon x times,
+                origadd = copy.deepcopy(addon)      #    x is NOT an integer
+                for i in range(repeats-1):
+                    addon = addon + origadd
+                    addon = addon[0:len(source)]
+        elif len(source) < len(addon):                # is addon list longer?
+            if len(addon) % len(source) == 0:        # are they integer multiples?
+                repeats = len(addon)/len(source)    # repeat source n times
+                origsour = copy.deepcopy(source)
+                for i in range(repeats-1):
+                    source = source + origsour
+            else:
+                repeats = len(addon)/len(source)+1  # repeat source x times,
+                origsour = copy.deepcopy(source)    #   x is NOT an integer
+                for i in range(repeats-1):
+                    source = source + origsour
+                source = source[0:len(addon)]
+
+        source = simpleabut(source,addon)
+    return source
+
+
+def simpleabut (source, addon):
+    """
+Concatenates two lists as columns and returns the result.  '2D' lists
+are also accomodated for either argument (source or addon).  This DOES NOT
+repeat either list to make the 2 lists of equal length.  Beware of list pairs
+with different lengths ... the resulting list will be the length of the
+FIRST list passed.
+
+Usage:   simpleabut(source,addon)  where source, addon=list (or list-of-lists)
+Returns: a list of lists as long as source, with source on the 'left' and
+                 addon on the 'right'
+"""
+    if type(source) not in [ListType,TupleType]:
+        source = [source]
+    if type(addon) not in [ListType,TupleType]:
+        addon = [addon]
+    minlen = min(len(source),len(addon))
+    list = copy.deepcopy(source)                # start abut process
+    if type(source[0]) not in [ListType,TupleType]:
+        if type(addon[0]) not in [ListType,TupleType]:
+            for i in range(minlen):
+                list[i] = [source[i]] + [addon[i]]        # source/addon = column
+        else:
+            for i in range(minlen):
+                list[i] = [source[i]] + addon[i]        # addon=list-of-lists
+    else:
+        if type(addon[0]) not in [ListType,TupleType]:
+            for i in range(minlen):
+                list[i] = source[i] + [addon[i]]        # source=list-of-lists
+        else:
+            for i in range(minlen):
+                list[i] = source[i] + addon[i]        # source/addon = list-of-lists
+    source = list
+    return source
+
+
+def colex (listoflists,cnums):
+    """
+Extracts from listoflists the columns specified in the list 'cnums'
+(cnums can be an integer, a sequence of integers, or a string-expression that
+corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
+columns 3 onward from the listoflists).
+
+Usage:   colex (listoflists,cnums)
+Returns: a list-of-lists corresponding to the columns from listoflists
+         specified by cnums, in the order the column numbers appear in cnums
+"""
+    global index
+    column = 0
+    if type(cnums) in [ListType,TupleType]:   # if multiple columns to get
+        index = cnums[0]
+        column = map(lambda x: x[index], listoflists)
+        for col in cnums[1:]:
+            index = col
+            column = abut(column,map(lambda x: x[index], listoflists))
+    elif type(cnums) == StringType:              # if an 'x[3:]' type expr.
+        evalstring = 'map(lambda x: x'+cnums+', listoflists)'
+        column = eval(evalstring)
+    else:                                     # else it's just 1 col to get
+        index = cnums
+        column = map(lambda x: x[index], listoflists)
+    return column
+
+
+def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
+     """
+Averages data in collapsecol, keeping all unique items in keepcols
+(using unique, which keeps unique LISTS of column numbers), retaining the
+unique sets of values in keepcols, the mean for each.  Setting fcn1
+and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
+will append those results (e.g., the sterr, N) after each calculated mean.
+cfcn is the collapse function to apply (defaults to mean, defined here in the
+pstat module to avoid circular imports with stats.py, but harmonicmean or
+others could be passed).
+
+Usage:    collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
+Returns: a list of lists with all unique permutations of entries appearing in
+     columns ("conditions") specified by keepcols, abutted with the result of
+     cfcn (if cfcn=None, defaults to the mean) of each column specified by
+     collapsecols.
+"""
+     def collmean (inlist):
+         s = 0
+         for item in inlist:
+             s = s + item
+         return s/float(len(inlist))
+
+     if type(keepcols) not in [ListType,TupleType]:
+         keepcols = [keepcols]
+     if type(collapsecols) not in [ListType,TupleType]:
+         collapsecols = [collapsecols]
+     if cfcn == None:
+         cfcn = collmean
+     if keepcols == []:
+         means = [0]*len(collapsecols)
+         for i in range(len(collapsecols)):
+             avgcol = colex(listoflists,collapsecols[i])
+             means[i] = cfcn(avgcol)
+             if fcn1:
+                 try:
+                     test = fcn1(avgcol)
+                 except:
+                     test = 'N/A'
+                     means[i] = [means[i], test]
+             if fcn2:
+                 try:
+                     test = fcn2(avgcol)
+                 except:
+                     test = 'N/A'
+                 try:
+                     means[i] = means[i] + [len(avgcol)]
+                 except TypeError:
+                     means[i] = [means[i],len(avgcol)]
+         return means
+     else:
+         values = colex(listoflists,keepcols)
+         uniques = unique(values)
+         uniques.sort()
+         newlist = []
+         if type(keepcols) not in [ListType,TupleType]:  keepcols = [keepcols]
+         for item in uniques:
+             if type(item) not in [ListType,TupleType]:  item =[item]
+             tmprows = linexand(listoflists,keepcols,item)
+             for col in collapsecols:
+                 avgcol = colex(tmprows,col)
+                 item.append(cfcn(avgcol))
+                 if fcn1 <> None:
+                     try:
+                         test = fcn1(avgcol)
+                     except:
+                         test = 'N/A'
+                     item.append(test)
+                 if fcn2 <> None:
+                     try:
+                         test = fcn2(avgcol)
+                     except:
+                         test = 'N/A'
+                     item.append(test)
+                 newlist.append(item)
+         return newlist
+
+
+def dm (listoflists,criterion):
+    """
+Returns rows from the passed list of lists that meet the criteria in
+the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
+will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
+with column 2 equal to the string 'N').
+
+Usage:   dm (listoflists, criterion)
+Returns: rows from listoflists that meet the specified criterion.
+"""
+    function = 'filter(lambda x: '+criterion+',listoflists)'
+    lines = eval(function)
+    return lines
+
+
+def flat(l):
+    """
+Returns the flattened version of a '2D' list.  List-correlate to the a.flat()
+method of NumPy arrays.
+
+Usage:    flat(l)
+"""
+    newl = []
+    for i in range(len(l)):
+        for j in range(len(l[i])):
+            newl.append(l[i][j])
+    return newl
+
+
+def linexand (listoflists,columnlist,valuelist):
+    """
+Returns the rows of a list of lists where col (from columnlist) = val
+(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
+len(columnlist) must equal len(valuelist).
+
+Usage:   linexand (listoflists,columnlist,valuelist)
+Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
+"""
+    if type(columnlist) not in [ListType,TupleType]:
+        columnlist = [columnlist]
+    if type(valuelist) not in [ListType,TupleType]:
+        valuelist = [valuelist]
+    criterion = ''
+    for i in range(len(columnlist)):
+        if type(valuelist[i])==StringType:
+            critval = '\'' + valuelist[i] + '\''
+        else:
+            critval = str(valuelist[i])
+        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
+    criterion = criterion[0:-3]         # remove the "and" after the last crit
+    function = 'filter(lambda x: '+criterion+',listoflists)'
+    lines = eval(function)
+    return lines
+
+
+def linexor (listoflists,columnlist,valuelist):
+    """
+Returns the rows of a list of lists where col (from columnlist) = val
+(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
+One value is required for each column in columnlist.  If only one value
+exists for columnlist but multiple values appear in valuelist, the
+valuelist values are all assumed to pertain to the same column.
+
+Usage:   linexor (listoflists,columnlist,valuelist)
+Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
+"""
+    if type(columnlist) not in [ListType,TupleType]:
+        columnlist = [columnlist]
+    if type(valuelist) not in [ListType,TupleType]:
+        valuelist = [valuelist]
+    criterion = ''
+    if len(columnlist) == 1 and len(valuelist) > 1:
+        columnlist = columnlist*len(valuelist)
+    for i in range(len(columnlist)):          # build an exec string
+        if type(valuelist[i])==StringType:
+            critval = '\'' + valuelist[i] + '\''
+        else:
+            critval = str(valuelist[i])
+        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
+    criterion = criterion[0:-2]         # remove the "or" after the last crit
+    function = 'filter(lambda x: '+criterion+',listoflists)'
+    lines = eval(function)
+    return lines
+
+
+def linedelimited (inlist,delimiter):
+    """
+Returns a string composed of elements in inlist, with each element
+separated by 'delimiter.'  Used by function writedelimited.  Use '\t'
+for tab-delimiting.
+
+Usage:   linedelimited (inlist,delimiter)
+"""
+    outstr = ''
+    for item in inlist:
+        if type(item) <> StringType:
+            item = str(item)
+        outstr = outstr + item + delimiter
+    outstr = outstr[0:-1]
+    return outstr
+
+
+def lineincols (inlist,colsize):
+    """
+Returns a string composed of elements in inlist, with each element
+right-aligned in columns of (fixed) colsize.
+
+Usage:   lineincols (inlist,colsize)   where colsize is an integer
+"""
+    outstr = ''
+    for item in inlist:
+        if type(item) <> StringType:
+            item = str(item)
+        size = len(item)
+        if size <= colsize:
+            for i in range(colsize-size):
+                outstr = outstr + ' '
+            outstr = outstr + item
+        else:
+            outstr = outstr + item[0:colsize+1]
+    return outstr
+
+
+def lineincustcols (inlist,colsizes):
+    """
+Returns a string composed of elements in inlist, with each element
+right-aligned in a column of width specified by a sequence colsizes.  The
+length of colsizes must be greater than or equal to the number of columns
+in inlist.
+
+Usage:   lineincustcols (inlist,colsizes)
+Returns: formatted string created from inlist
+"""
+    outstr = ''
+    for i in range(len(inlist)):
+        if type(inlist[i]) <> StringType:
+            item = str(inlist[i])
+        else:
+            item = inlist[i]
+        size = len(item)
+        if size <= colsizes[i]:
+            for j in range(colsizes[i]-size):
+                outstr = outstr + ' '
+            outstr = outstr + item
+        else:
+            outstr = outstr + item[0:colsizes[i]+1]
+    return outstr
+
+
+def list2string (inlist,delimit=' '):
+    """
+Converts a 1D list to a single long string for file output, using
+the string.join function.
+
+Usage:   list2string (inlist,delimit=' ')
+Returns: the string created from inlist
+"""
+    stringlist = map(makestr,inlist)
+    return string.join(stringlist,delimit)
+
+
+def makelol(inlist):
+    """
+Converts a 1D list to a 2D list (i.e., a list-of-lists).  Useful when you
+want to use put() to write a 1D list one item per line in the file.
+
+Usage:   makelol(inlist)
+Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
+"""
+    x = []
+    for item in inlist:
+        x.append([item])
+    return x
+
+
+def makestr (x):
+    if type(x) <> StringType:
+        x = str(x)
+    return x
+
+
+def printcc (lst,extra=2):
+    """
+Prints a list of lists in columns, customized by the max size of items
+within the columns (max size of items in col, plus 'extra' number of spaces).
+Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
+respectively.
+
+Usage:   printcc (lst,extra=2)
+Returns: None
+"""
+    if type(lst[0]) not in [ListType,TupleType]:
+        lst = [lst]
+    rowstokill = []
+    list2print = copy.deepcopy(lst)
+    for i in range(len(lst)):
+        if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
+            rowstokill = rowstokill + [i]
+    rowstokill.reverse()   # delete blank rows from the end
+    for row in rowstokill:
+        del list2print[row]
+    maxsize = [0]*len(list2print[0])
+    for col in range(len(list2print[0])):
+        items = colex(list2print,col)
+        items = map(makestr,items)
+        maxsize[col] = max(map(len,items)) + extra
+    for row in lst:
+        if row == ['\n'] or row == '\n' or row == '' or row == ['']:
+            print
+        elif row == ['dashes'] or row == 'dashes':
+            dashes = [0]*len(maxsize)
+            for j in range(len(maxsize)):
+                dashes[j] = '-'*(maxsize[j]-2)
+            print lineincustcols(dashes,maxsize)
+        else:
+            print lineincustcols(row,maxsize)
+    return None
+
+
+def printincols (listoflists,colsize):
+    """
+Prints a list of lists in columns of (fixed) colsize width, where
+colsize is an integer.
+
+Usage:   printincols (listoflists,colsize)
+Returns: None
+"""
+    for row in listoflists:
+        print lineincols(row,colsize)
+    return None
+
+
+def pl (listoflists):
+    """
+Prints a list of lists, 1 list (row) at a time.
+
+Usage:   pl(listoflists)
+Returns: None
+"""
+    for row in listoflists:
+        if row[-1] == '\n':
+            print row,
+        else:
+            print row
+    return None
+
+
+def printl(listoflists):
+    """Alias for pl."""
+    pl(listoflists)
+    return
+
+
+def replace (inlst,oldval,newval):
+    """
+Replaces all occurrences of 'oldval' with 'newval', recursively.
+
+Usage:   replace (inlst,oldval,newval)
+"""
+    lst = inlst*1
+    for i in range(len(lst)):
+        if type(lst[i]) not in [ListType,TupleType]:
+            if lst[i]==oldval: lst[i]=newval
+        else:
+            lst[i] = replace(lst[i],oldval,newval)
+    return lst
+
+
+def recode (inlist,listmap,cols=None):
+    """
+Changes the values in a list to a new set of values (useful when
+you need to recode data from (e.g.) strings to numbers.  cols defaults
+to None (meaning all columns are recoded).
+
+Usage:   recode (inlist,listmap,cols=None)  cols=recode cols, listmap=2D list
+Returns: inlist with the appropriate values replaced with new ones
+"""
+    lst = copy.deepcopy(inlist)
+    if cols != None:
+        if type(cols) not in [ListType,TupleType]:
+            cols = [cols]
+        for col in cols:
+            for row in range(len(lst)):
+                try:
+                    idx = colex(listmap,0).index(lst[row][col])
+                    lst[row][col] = listmap[idx][1]
+                except ValueError:
+                    pass
+    else:
+        for row in range(len(lst)):
+            for col in range(len(lst)):
+                try:
+                    idx = colex(listmap,0).index(lst[row][col])
+                    lst[row][col] = listmap[idx][1]
+                except ValueError:
+                    pass
+    return lst
+
+
+def remap (listoflists,criterion):
+    """
+Remaps values in a given column of a 2D list (listoflists).  This requires
+a criterion as a function of 'x' so that the result of the following is
+returned ... map(lambda x: 'criterion',listoflists).  
+
+Usage:   remap(listoflists,criterion)    criterion=string
+Returns: remapped version of listoflists
+"""
+    function = 'map(lambda x: '+criterion+',listoflists)'
+    lines = eval(function)
+    return lines
+
+
+def roundlist (inlist,digits):
+    """
+Goes through each element in a 1D or 2D inlist, and applies the following
+function to all elements of FloatType ... round(element,digits).
+
+Usage:   roundlist(inlist,digits)
+Returns: list with rounded floats
+"""
+    if type(inlist[0]) in [IntType, FloatType]:
+        inlist = [inlist]
+    l = inlist*1
+    for i in range(len(l)):
+        for j in range(len(l[i])):
+            if type(l[i][j])==FloatType:
+                l[i][j] = round(l[i][j],digits)
+    return l
+
+
+def sortby(listoflists,sortcols):
+    """
+Sorts a list of lists on the column(s) specified in the sequence
+sortcols.
+
+Usage:   sortby(listoflists,sortcols)
+Returns: sorted list, unchanged column ordering
+"""
+    newlist = abut(colex(listoflists,sortcols),listoflists)
+    newlist.sort()
+    try:
+        numcols = len(sortcols)
+    except TypeError:
+        numcols = 1
+    crit = '[' + str(numcols) + ':]'
+    newlist = colex(newlist,crit)
+    return newlist
+
+
+def unique (inlist):
+    """
+Returns all unique items in the passed list.  If the a list-of-lists
+is passed, unique LISTS are found (i.e., items in the first dimension are
+compared).
+
+Usage:   unique (inlist)
+Returns: the unique elements (or rows) in inlist
+"""
+    uniques = []
+    for item in inlist:
+        if item not in uniques:
+            uniques.append(item)
+    return uniques
+
+def duplicates(inlist):
+    """
+Returns duplicate items in the FIRST dimension of the passed list.
+
+Usage:   duplicates (inlist)
+"""
+    dups = []
+    for i in range(len(inlist)):
+        if inlist[i] in inlist[i+1:]:
+            dups.append(inlist[i])
+    return dups
+
+
+def nonrepeats(inlist):
+    """
+Returns items that are NOT duplicated in the first dim of the passed list.
+
+Usage:   nonrepeats (inlist)
+"""
+    nonrepeats = []
+    for i in range(len(inlist)):
+        if inlist.count(inlist[i]) == 1:
+            nonrepeats.append(inlist[i])
+    return nonrepeats
+
+
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+#===================   PSTAT ARRAY FUNCTIONS  =====================
+
+try:                         # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE
+ import Numeric
+ N = Numeric
+
+ def aabut (source, *args):
+    """
+Like the |Stat abut command.  It concatenates two arrays column-wise
+and returns the result.  CAUTION:  If one array is shorter, it will be
+repeated until it is as long as the other.
+
+Usage:   aabut (source, args)    where args=any # of arrays
+Returns: an array as long as the LONGEST array past, source appearing on the
+         'left', arrays in <args> attached on the 'right'.
+"""
+    if len(source.shape)==1:
+        width = 1
+        source = N.resize(source,[source.shape[0],width])
+    else:
+        width = source.shape[1]
+    for addon in args:
+        if len(addon.shape)==1:
+            width = 1
+            addon = N.resize(addon,[source.shape[0],width])
+        else:
+            width = source.shape[1]
+        if len(addon) < len(source):
+            addon = N.resize(addon,[source.shape[0],addon.shape[1]])
+        elif len(source) < len(addon):
+            source = N.resize(source,[addon.shape[0],source.shape[1]])
+        source = N.concatenate((source,addon),1)
+    return source
+
+
+ def acolex (a,indices,axis=1):
+    """
+Extracts specified indices (a list) from passed array, along passed
+axis (column extraction is default).  BEWARE: A 1D array is presumed to be a
+column-array (and that the whole array will be returned as a column).
+
+Usage:   acolex (a,indices,axis=1)
+Returns: the columns of a specified by indices
+"""
+    if type(indices) not in [ListType,TupleType,N.ArrayType]:
+        indices = [indices]
+    if len(N.shape(a)) == 1:
+        cols = N.resize(a,[a.shape[0],1])
+    else:
+        cols = N.take(a,indices,axis)
+    return cols
+
+
+ def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
+    """
+Averages data in collapsecol, keeping all unique items in keepcols
+(using unique, which keeps unique LISTS of column numbers), retaining
+the unique sets of values in keepcols, the mean for each.  If stderror or
+N of the mean are desired, set either or both parameters to 1.
+
+Usage:   acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
+Returns: unique 'conditions' specified by the contents of columns specified
+         by keepcols, abutted with the mean(s) of column(s) specified by
+         collapsecols
+"""
+    def acollmean (inarray):
+        return N.sum(N.ravel(inarray))
+
+    if cfcn == None:
+        cfcn = acollmean
+    if keepcols == []:
+        avgcol = acolex(a,collapsecols)
+        means = N.sum(avgcol)/float(len(avgcol))
+        if fcn1<>None:
+            try:
+                test = fcn1(avgcol)
+            except:
+                test = N.array(['N/A']*len(means))
+            means = aabut(means,test)
+        if fcn2<>None:
+            try:
+                test = fcn2(avgcol)
+            except:
+                test = N.array(['N/A']*len(means))
+            means = aabut(means,test)
+        return means
+    else:
+        if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
+            keepcols = [keepcols]
+        values = colex(a,keepcols)   # so that "item" can be appended (below)
+        uniques = unique(values)  # get a LIST, so .sort keeps rows intact
+        uniques.sort()
+        newlist = []
+        for item in uniques:
+            if type(item) not in [ListType,TupleType,N.ArrayType]:
+                item =[item]
+            tmprows = alinexand(a,keepcols,item)
+            for col in collapsecols:
+                avgcol = acolex(tmprows,col)
+                item.append(acollmean(avgcol))
+                if fcn1<>None:
+                    try:
+                        test = fcn1(avgcol)
+                    except:
+                        test = 'N/A'
+                    item.append(test)
+                if fcn2<>None:
+                    try:
+                        test = fcn2(avgcol)
+                    except:
+                        test = 'N/A'
+                    item.append(test)
+                newlist.append(item)
+        try:
+            new_a = N.array(newlist)
+        except TypeError:
+            new_a = N.array(newlist,'O')
+        return new_a
+
+
+ def adm (a,criterion):
+    """
+Returns rows from the passed list of lists that meet the criteria in
+the passed criterion expression (a string as a function of x).
+
+Usage:   adm (a,criterion)   where criterion is like 'x[2]==37'
+"""
+    function = 'filter(lambda x: '+criterion+',a)'
+    lines = eval(function)
+    try:
+        lines = N.array(lines)
+    except:
+        lines = N.array(lines,'O')
+    return lines
+
+
+ def isstring(x):
+    if type(x)==StringType:
+        return 1
+    else:
+        return 0
+
+
+ def alinexand (a,columnlist,valuelist):
+    """
+Returns the rows of an array where col (from columnlist) = val
+(from valuelist).  One value is required for each column in columnlist.
+
+Usage:   alinexand (a,columnlist,valuelist)
+Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
+"""
+    if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
+        columnlist = [columnlist]
+    if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
+        valuelist = [valuelist]
+    criterion = ''
+    for i in range(len(columnlist)):
+        if type(valuelist[i])==StringType:
+            critval = '\'' + valuelist[i] + '\''
+        else:
+            critval = str(valuelist[i])
+        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
+    criterion = criterion[0:-3]         # remove the "and" after the last crit
+    return adm(a,criterion)
+
+
+ def alinexor (a,columnlist,valuelist):
+    """
+Returns the rows of an array where col (from columnlist) = val (from
+valuelist).  One value is required for each column in columnlist.
+The exception is if either columnlist or valuelist has only 1 value,
+in which case that item will be expanded to match the length of the
+other list.
+
+Usage:   alinexor (a,columnlist,valuelist)
+Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
+"""
+    if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
+        columnlist = [columnlist]
+    if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
+        valuelist = [valuelist]
+    criterion = ''
+    if len(columnlist) == 1 and len(valuelist) > 1:
+        columnlist = columnlist*len(valuelist)
+    elif len(valuelist) == 1 and len(columnlist) > 1:
+        valuelist = valuelist*len(columnlist)
+    for i in range(len(columnlist)):
+        if type(valuelist[i])==StringType:
+            critval = '\'' + valuelist[i] + '\''
+        else:
+            critval = str(valuelist[i])
+        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
+    criterion = criterion[0:-2]         # remove the "or" after the last crit
+    return adm(a,criterion)
+
+
+ def areplace (a,oldval,newval):
+    """
+Replaces all occurrences of oldval with newval in array a.
+
+Usage:   areplace(a,oldval,newval)
+"""
+    newa = N.not_equal(a,oldval)*a
+    return newa+N.equal(a,oldval)*newval
+
+
+ def arecode (a,listmap,col='all'):
+    """
+Remaps the values in an array to a new set of values (useful when
+you need to recode data from (e.g.) strings to numbers as most stats
+packages require.  Can work on SINGLE columns, or 'all' columns at once.
+
+Usage:   arecode (a,listmap,col='all')
+Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
+"""
+    ashape = a.shape
+    if col == 'all':
+        work = a.flat
+    else:
+        work = acolex(a,col)
+        work = work.flat
+    for pair in listmap:
+        if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
+            work = N.array(work,'O')
+            a = N.array(a,'O')
+            for i in range(len(work)):
+                if work[i]==pair[0]:
+                    work[i] = pair[1]
+            if col == 'all':
+                return N.reshape(work,ashape)
+            else:
+                return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
+        else:   # must be a non-Object type array and replacement
+            work = N.where(N.equal(work,pair[0]),pair[1],work)
+            return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
+
+
+ def arowcompare(row1, row2):
+    """
+Compares two rows from an array, regardless of whether it is an
+array of numbers or of python objects (which requires the cmp function).
+
+Usage:   arowcompare(row1,row2)
+Returns: an array of equal length containing 1s where the two rows had
+         identical elements and 0 otherwise
+"""
+    if row1.typecode()=='O' or row2.typecode=='O':
+        cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
+    else:
+        cmpvect = N.equal(row1,row2)
+    return cmpvect
+
+
+ def arowsame(row1, row2):
+    """
+Compares two rows from an array, regardless of whether it is an
+array of numbers or of python objects (which requires the cmp function).
+
+Usage:   arowsame(row1,row2)
+Returns: 1 if the two rows are identical, 0 otherwise.
+"""
+    cmpval = N.alltrue(arowcompare(row1,row2))
+    return cmpval
+
+
+ def asortrows(a,axis=0):
+    """
+Sorts an array "by rows".  This differs from the Numeric.sort() function,
+which sorts elements WITHIN the given axis.  Instead, this function keeps
+the elements along the given axis intact, but shifts them 'up or down'
+relative to one another.
+
+Usage:   asortrows(a,axis=0)
+Returns: sorted version of a
+"""
+    if axis != 0:
+        a = N.swapaxes(a, axis, 0)
+    l = a.tolist()
+    l.sort()           # or l.sort(_sort)
+    y = N.array(l)
+    if axis != 0:
+        y = N.swapaxes(y, axis, 0)
+    return y
+
+
+ def aunique(inarray):
+    """
+Returns unique items in the FIRST dimension of the passed array. Only
+works on arrays NOT including string items.
+
+Usage:   aunique (inarray)
+"""
+    uniques = N.array([inarray[0]])
+    if len(uniques.shape) == 1:            # IF IT'S A 1D ARRAY
+        for item in inarray[1:]:
+            if N.add.reduce(N.equal(uniques,item).flat) == 0:
+                try:
+                    uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
+                except TypeError:
+                    uniques = N.concatenate([uniques,N.array([item])])
+    else:                                  # IT MUST BE A 2+D ARRAY
+        if inarray.typecode() != 'O':  # not an Object array
+            for item in inarray[1:]:
+                if not N.sum(N.alltrue(N.equal(uniques,item),1)):
+                    try:
+                        uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
+                    except TypeError:    # the item to add isn't a list
+                        uniques = N.concatenate([uniques,N.array([item])])
+                else:
+                    pass  # this item is already in the uniques array
+        else:   # must be an Object array, alltrue/equal functions don't work
+            for item in inarray[1:]:
+                newflag = 1
+                for unq in uniques:  # NOTE: cmp --> 0=same, -1=<, 1=>
+                    test = N.sum(abs(N.array(map(cmp,item,unq))))
+                    if test == 0:   # if item identical to any 1 row in uniques
+                        newflag = 0 # then not a novel item to add
+                        break
+                if newflag == 1:
+                    try:
+                        uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
+                    except TypeError:    # the item to add isn't a list
+                        uniques = N.concatenate([uniques,N.array([item])])
+    return uniques
+
+
+ def aduplicates(inarray):
+    """
+Returns duplicate items in the FIRST dimension of the passed array. Only
+works on arrays NOT including string items.
+
+Usage:   aunique (inarray)
+"""
+    inarray = N.array(inarray)
+    if len(inarray.shape) == 1:            # IF IT'S A 1D ARRAY
+        dups = []
+        inarray = inarray.tolist()
+        for i in range(len(inarray)):
+            if inarray[i] in inarray[i+1:]:
+                dups.append(inarray[i])
+        dups = aunique(dups)
+    else:                                  # IT MUST BE A 2+D ARRAY
+        dups = []
+        aslist = inarray.tolist()
+        for i in range(len(aslist)):
+            if aslist[i] in aslist[i+1:]:
+                dups.append(aslist[i])
+        dups = unique(dups)
+        dups = N.array(dups)
+    return dups
+
+except ImportError:    # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
+ pass

Orange/bioinformatics/stats.py

+# Copyright (c) 1999-2002 Gary Strangman; All Rights Reserved.
+#
+# This software is distributable under the terms of the GNU
+# General Public License (GPL) v2, the text of which can be found at
+# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
+# using this module constitutes acceptance of the terms of this License.
+#
+# Disclaimer
+# 
+# This software is provided "as-is".  There are no expressed or implied
+# warranties of any kind, including, but not limited to, the warranties
+# of merchantability and fittness for a given application.  In no event
+# shall Gary Strangman be liable for any direct, indirect, incidental,
+# special, exemplary or consequential damages (including, but not limited
+# to, loss of use, data or profits, or business interruption) however
+# caused and on any theory of liability, whether in contract, strict
+# liability or tort (including negligence or otherwise) arising in any way
+# out of the use of this software, even if advised of the possibility of
+# such damage.
+#
+# Comments and/or additions are welcome (send e-mail to:
+# strang@nmr.mgh.harvard.edu).
+# 
+"""
+stats.py module
+
+(Requires pstat.py module.)
+
+#################################################
+#######  Written by:  Gary Strangman  ###########
+#######  Last modified:  May 10, 2002 ###########
+#################################################
+
+A collection of basic statistical functions for python.  The function
+names appear below.
+
+IMPORTANT:  There are really *3* sets of functions.  The first set has an 'l'
+prefix, which can be used with list or tuple arguments.  The second set has
+an 'a' prefix, which can accept NumPy array arguments.  These latter
+functions are defined only when NumPy is available on the system.  The third
+type has NO prefix (i.e., has the name that appears below).  Functions of
+this set are members of a "Dispatch" class, c/o David Ascher.  This class
+allows different functions to be called depending on the type of the passed
+arguments.  Thus, stats.mean is a member of the Dispatch class and
+stats.mean(range(20)) will call stats.lmean(range(20)) while
+stats.mean(Numeric.arange(20)) will call stats.amean(Numeric.arange(20)).
+This is a handy way to keep consistent function names when different
+argument types require different functions to be called.  Having
+implementated the Dispatch class, however, means that to get info on
+a given function, you must use the REAL function name ... that is
+"print stats.lmean.__doc__" or "print stats.amean.__doc__" work fine,
+while "print stats.mean.__doc__" will print the doc for the Dispatch
+class.  NUMPY FUNCTIONS ('a' prefix) generally have more argument options
+but should otherwise be consistent with the corresponding list functions.
+
+Disclaimers:  The function list is obviously incomplete and, worse, the
+functions are not optimized.  All functions have been tested (some more
+so than others), but they are far from bulletproof.  Thus, as with any
+free software, no warranty or guarantee is expressed or implied. :-)  A
+few extra functions that don't appear in the list below can be found by
+interested treasure-hunters.  These functions don't necessarily have
+both list and array versions but were deemed useful
+
+CENTRAL TENDENCY:  geometricmean
+                   harmonicmean
+                   mean
+                   median
+                   medianscore
+                   mode
+
+MOMENTS:  moment
+          variation
+          skew
+          kurtosis
+          skewtest   (for Numpy arrays only)
+          kurtosistest (for Numpy arrays only)
+          normaltest (for Numpy arrays only)
+
+ALTERED VERSIONS:  tmean  (for Numpy arrays only)
+                   tvar   (for Numpy arrays only)
+                   tmin   (for Numpy arrays only)
+                   tmax   (for Numpy arrays only)
+                   tstdev (for Numpy arrays only)
+                   tsem   (for Numpy arrays only)
+                   describe
+
+FREQUENCY STATS:  itemfreq
+                  scoreatpercentile
+                  percentileofscore
+                  histogram
+                  cumfreq
+                  relfreq
+
+VARIABILITY:  obrientransform
+              samplevar
+              samplestdev
+              signaltonoise (for Numpy arrays only)
+              var
+              stdev
+              sterr
+              sem
+              z
+              zs
+              zmap (for Numpy arrays only)
+
+TRIMMING FCNS:  threshold (for Numpy arrays only)
+                trimboth
+                trim1
+                round (round all vals to 'n' decimals; Numpy only)
+
+CORRELATION FCNS:  covariance  (for Numpy arrays only)
+                   correlation (for Numpy arrays only)
+                   paired
+                   pearsonr
+                   spearmanr
+                   pointbiserialr
+                   kendalltau
+                   linregress
+
+INFERENTIAL STATS:  ttest_1samp
+                    ttest_ind
+                    ttest_rel
+                    chisquare
+                    ks_2samp
+                    mannwhitneyu
+                    ranksums
+                    wilcoxont
+                    kruskalwallish
+                    friedmanchisquare
+
+PROBABILITY CALCS:  chisqprob
+                    erfcc
+                    zprob
+                    ksprob
+                    fprob
+                    betacf
+                    gammln 
+                    betai
+
+ANOVA FUNCTIONS:  F_oneway
+                  F_value
+
+SUPPORT FUNCTIONS:  writecc
+                    incr
+                    sign  (for Numpy arrays only)
+                    sum
+                    cumsum
+                    ss
+                    summult
+                    sumdiffsquared
+                    square_of_sums
+                    shellsort
+                    rankdata
+                    outputpairedstats
+                    findwithin
+"""
+## CHANGE LOG:
+## ===========
+## 02-11-19 ... fixed attest_ind and attest_rel for div-by-zero Overflows
+## 02-05-10 ... fixed lchisqprob indentation (failed when df=even)
+## 00-12-28 ... removed aanova() to separate module, fixed licensing to
+##                   match Python License, fixed doc string & imports
+## 00-04-13 ... pulled all "global" statements, except from aanova()
+##              added/fixed lots of documentation, removed io.py dependency
+##              changed to version 0.5
+## 99-11-13 ... added asign() function
+## 99-11-01 ... changed version to 0.4 ... enough incremental changes now
+## 99-10-25 ... added acovariance and acorrelation functions
+## 99-10-10 ... fixed askew/akurtosis to avoid divide-by-zero errors
+##              added aglm function (crude, but will be improved)
+## 99-10-04 ... upgraded acumsum, ass, asummult, asamplevar, avar, etc. to
+##                   all handle lists of 'dimension's and keepdims
+##              REMOVED ar0, ar2, ar3, ar4 and replaced them with around
+##              reinserted fixes for abetai to avoid math overflows
+## 99-09-05 ... rewrote achisqprob/aerfcc/aksprob/afprob/abetacf/abetai to
+##                   handle multi-dimensional arrays (whew!)
+## 99-08-30 ... fixed l/amoment, l/askew, l/akurtosis per D'Agostino (1990)
+##              added anormaltest per same reference
+##              re-wrote azprob to calc arrays of probs all at once
+## 99-08-22 ... edited attest_ind printing section so arrays could be rounded
+## 99-08-19 ... fixed amean and aharmonicmean for non-error(!) overflow on
+##                   short/byte arrays (mean of #s btw 100-300 = -150??)
+## 99-08-09 ... fixed asum so that the None case works for Byte arrays
+## 99-08-08 ... fixed 7/3 'improvement' to handle t-calcs on N-D arrays
+## 99-07-03 ... improved attest_ind, attest_rel (zero-division errortrap)
+## 99-06-24 ... fixed bug(?) in attest_ind (n1=a.shape[0])
+## 04/11/99 ... added asignaltonoise, athreshold functions, changed all
+##                   max/min in array section to N.maximum/N.minimum,
+##                   fixed square_of_sums to prevent integer overflow
+## 04/10/99 ... !!! Changed function name ... sumsquared ==> square_of_sums
+## 03/18/99 ... Added ar0, ar2, ar3 and ar4 rounding functions
+## 02/28/99 ... Fixed aobrientransform to return an array rather than a list
+## 01/15/99 ... Essentially ceased updating list-versions of functions (!!!)
+## 01/13/99 ... CHANGED TO VERSION 0.3
+##              fixed bug in a/lmannwhitneyu p-value calculation
+## 12/31/98 ... fixed variable-name bug in ldescribe
+## 12/19/98 ... fixed bug in findwithin (fcns needed pstat. prefix)
+## 12/16/98 ... changed amedianscore to return float (not array) for 1 score
+## 12/14/98 ... added atmin and atmax functions
+##              removed umath from import line (not needed)
+##              l/ageometricmean modified to reduce chance of overflows (take
+##                   nth root first, then multiply)
+## 12/07/98 ... added __version__variable (now 0.2)
+##              removed all 'stats.' from anova() fcn
+## 12/06/98 ... changed those functions (except shellsort) that altered
+##                   arguments in-place ... cumsum, ranksort, ...
+##              updated (and fixed some) doc-strings
+## 12/01/98 ... added anova() function (requires NumPy)
+##              incorporated Dispatch class
+## 11/12/98 ... added functionality to amean, aharmonicmean, ageometricmean
+##              added 'asum' function (added functionality to N.add.reduce)
+##              fixed both moment and amoment (two errors)
+##              changed name of skewness and askewness to skew and askew
+##              fixed (a)histogram (which sometimes counted points <lowerlimit)
+
+import pstat               # required 3rd party module
+import math, string, copy  # required python modules
+from types import *
+
+__version__ = 0.5
+
+############# DISPATCH CODE ##############
+
+
+class Dispatch:
+    """
+The Dispatch class, care of David Ascher, allows different functions to
+be called depending on the argument types.  This way, there can be one
+function name regardless of the argument type.  To access function doc
+in stats.py module, prefix the function with an 'l' or 'a' for list or
+array arguments, respectively.  That is, print stats.lmean.__doc__ or
+print stats.amean.__doc__ or whatever.
+"""
+
+    def __init__(self, *tuples):
+        self._dispatch = {}
+        for func, types in tuples:
+            for t in types:
+                if t in self._dispatch.keys():
+                    raise ValueError, "can't have two dispatches on "+str(t)
+                self._dispatch[t] = func
+        self._types = self._dispatch.keys()
+
+    def __call__(self, arg1, *args, **kw):
+        if type(arg1) not in self._types:
+            raise TypeError, "don't know how to dispatch %s arguments" %  type(arg1)
+        return apply(self._dispatch[type(arg1)], (arg1,) + args, kw)
+
+
+##########################################################################
+########################   LIST-BASED FUNCTIONS   ########################
+##########################################################################
+
+### Define these regardless
+
+####################################
+#######  CENTRAL TENDENCY  #########
+####################################
+
+def lgeometricmean (inlist):
+    """
+Calculates the geometric mean of the values in the passed list.
+That is:  n-th root of (x1 * x2 * ... * xn).  Assumes a '1D' list.
+
+Usage:   lgeometricmean(inlist)
+"""
+    mult = 1.0
+    one_over_n = 1.0/len(inlist)
+    for item in inlist:
+        mult = mult * pow(item,one_over_n)
+    return mult
+
+
+def lharmonicmean (inlist):
+    """
+Calculates the harmonic mean of the values in the passed list.
+That is:  n / (1/x1 + 1/x2 + ... + 1/xn).  Assumes a '1D' list.
+
+Usage:   lharmonicmean(inlist)
+"""
+    sum = 0
+    for item in inlist:
+        sum = sum + 1.0/item
+    return len(inlist) / sum
+
+
+def lmean (inlist):
+    """
+Returns the arithematic mean of the values in the passed list.
+Assumes a '1D' list, but will function on the 1st dim of an array(!).
+
+Usage:   lmean(inlist)
+"""
+    sum = 0
+    for item in inlist:
+        sum = sum + item
+    return sum/float(len(inlist))
+
+
+def lmedian (inlist,numbins=1000):
+    """
+Returns the computed median value of a list of numbers, given the
+number of bins to use for the histogram (more bins brings the computed value
+closer to the median score, default number of bins = 1000).  See G.W.
+Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics.
+
+Usage:   lmedian (inlist, numbins=1000)
+"""
+    (hist, smallest, binsize, extras) = histogram(inlist,numbins) # make histog
+    cumhist = cumsum(hist)              # make cumulative histogram
+    for i in range(len(cumhist)):        # get 1st(!) index holding 50%ile score
+        if cumhist[i]>=len(inlist)/2.0:
+            cfbin = i
+            break
+    LRL = smallest + binsize*cfbin        # get lower read limit of that bin
+    cfbelow = cumhist[cfbin-1]
+    freq = float(hist[cfbin])                # frequency IN the 50%ile bin
+    median = LRL + ((len(inlist)/2.0 - cfbelow)/float(freq))*binsize  # median formula
+    return median
+
+
+def lmedianscore (inlist):
+    """
+Returns the 'middle' score of the passed list.  If there is an even
+number of scores, the mean of the 2 middle scores is returned.
+
+Usage:   lmedianscore(inlist)
+"""
+
+    newlist = copy.deepcopy(inlist)
+    newlist.sort()
+    if len(newlist) % 2 == 0:   # if even number of scores, average middle 2
+        index = len(newlist)/2  # integer division correct
+        median = float(newlist[index] + newlist[index-1]) /2
+    else:
+        index = len(newlist)/2  # int divsion gives mid value when count from 0
+        median = newlist[index]
+    return median
+
+
+def lmode(inlist):
+    """
+Returns a list of the modal (most common) score(s) in the passed
+list.  If there is more than one such score, all are returned.  The
+bin-count for the mode(s) is also returned.
+
+Usage:   lmode(inlist)
+Returns: bin-count for mode(s), a list of modal value(s)
+"""
+
+    scores = pstat.unique(inlist)
+    scores.sort()
+    freq = []
+    for item in scores:
+        freq.append(inlist.count(item))
+    maxfreq = max(freq)
+    mode = []
+    stillmore = 1
+    while stillmore:
+        try:
+            indx = freq.index(maxfreq)
+            mode.append(scores[indx])
+            del freq[indx]
+            del scores[indx]
+        except ValueError:
+            stillmore=0
+    return maxfreq, mode
+
+
+####################################
+############  MOMENTS  #############
+####################################
+
+def lmoment(inlist,moment=1):
+    """
+Calculates the nth moment about the mean for a sample (defaults to
+the 1st moment).  Used to calculate coefficients of skewness and kurtosis.
+
+Usage:   lmoment(inlist,moment=1)
+Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r)
+"""
+    if moment == 1:
+        return 0.0
+    else:
+        mn = mean(inlist)
+        n = len(inlist)
+        s = 0
+        for x in inlist:
+            s = s + (x-mn)**moment
+        return s/float(n)
+
+
+def lvariation(inlist):
+    """
+Returns the coefficient of variation, as defined in CRC Standard
+Probability and Statistics, p.6.
+
+Usage:   lvariation(inlist)
+"""
+    return 100.0*samplestdev(inlist)/float(mean(inlist))
+
+
+def lskew(inlist):
+    """
+Returns the skewness of a distribution, as defined in Numerical
+Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
+
+Usage:   lskew(inlist)
+"""
+    return moment(inlist,3)/pow(moment(inlist,2),1.5)
+
+
+def lkurtosis(inlist):
+    """
+Returns the kurtosis of a distribution, as defined in Numerical
+Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
+
+Usage:   lkurtosis(inlist)
+"""
+    return moment(inlist,4)/pow(moment(inlist,2),2.0)
+
+
+def ldescribe(inlist):
+    """
+Returns some descriptive statistics of the passed list (assumed to be 1D).
+
+Usage:   ldescribe(inlist)
+Returns: n, mean, standard deviation, skew, kurtosis
+"""
+    n = len(inlist)
+    mm = (min(inlist),max(inlist))
+    m = mean(inlist)
+    sd = stdev(inlist)
+    sk = skew(inlist)
+    kurt = kurtosis(inlist)
+    return n, mm, m, sd, sk, kurt
+
+
+####################################
+#######  FREQUENCY STATS  ##########
+####################################
+
+def litemfreq(inlist):
+    """
+Returns a list of pairs.  Each pair consists of one of the scores in inlist
+and it's frequency count.  Assumes a 1D list is passed.
+
+Usage:   litemfreq(inlist)
+Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)
+"""
+    scores = pstat.unique(inlist)
+    scores.sort()
+    freq = []
+    for item in scores:
+        freq.append(inlist.count(item))
+    return pstat.abut(scores, freq)
+
+
+def lscoreatpercentile (inlist, percent):
+    """
+Returns the score at a given percentile relative to the distribution
+given by inlist.
+
+Usage:   lscoreatpercentile(inlist,percent)
+"""
+    if percent > 1:
+        print "\nDividing percent>1 by 100 in lscoreatpercentile().\n"
+        percent = percent / 100.0
+    targetcf = percent*len(inlist)
+    h, lrl, binsize, extras = histogram(inlist)
+    cumhist = cumsum(copy.deepcopy(h))
+    for i in range(len(cumhist)):
+        if cumhist[i] >= targetcf:
+            break
+    score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i)
+    return score
+
+
+def lpercentileofscore (inlist, score,histbins=10,defaultlimits=None):
+    """
+Returns the percentile value of a score relative to the distribution
+given by inlist.  Formula depends on the values used to histogram the data(!).
+
+Usage:   lpercentileofscore(inlist,score,histbins=10,defaultlimits=None)
+"""
+
+    h, lrl, binsize, extras = histogram(inlist,histbins,defaultlimits)
+    cumhist = cumsum(copy.deepcopy(h))
+    i = int((score - lrl)/float(binsize))
+    pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inlist)) * 100
+    return pct
+
+
+def lhistogram (inlist,numbins=10,defaultreallimits=None,printextras=0):
+    """
+Returns (i) a list of histogram bin counts, (ii) the smallest value
+of the histogram binning, and (iii) the bin width (the last 2 are not
+necessarily integers).  Default number of bins is 10.  If no sequence object
+is given for defaultreallimits, the routine picks (usually non-pretty) bins
+spanning all the numbers in the inlist.
+
+Usage:   lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0)
+Returns: list of bin values, lowerreallimit, binsize, extrapoints
+"""
+    if (defaultreallimits <> None):
+        if type(defaultreallimits) not in [ListType,TupleType] or len(defaultreallimits)==1: # only one limit given, assumed to be lower one & upper is calc'd
+            lowerreallimit = defaultreallimits
+            upperreallimit = 1.0001 * max(inlist)
+        else: # assume both limits given
+            lowerreallimit = defaultreallimits[0]
+            upperreallimit = defaultreallimits[1]
+        binsize = (upperreallimit-lowerreallimit)/float(numbins)
+    else:     # no limits given for histogram, both must be calc'd
+        estbinwidth=(max(inlist)-min(inlist))/float(numbins) + 1 # 1=>cover all
+        binsize = ((max(inlist)-min(inlist)+estbinwidth))/float(numbins)
+        lowerreallimit = min(inlist) - binsize/2 #lower real limit,1st bin
+    bins = [0]*(numbins)
+    extrapoints = 0
+    for num in inlist:
+        try:
+            if (num-lowerreallimit) < 0:
+                extrapoints = extrapoints + 1
+            else:
+                bintoincrement = int((num-lowerreallimit)/float(binsize))
+                bins[bintoincrement] = bins[bintoincrement] + 1
+        except:
+            extrapoints = extrapoints + 1
+    if (extrapoints > 0 and printextras == 1):
+        print '\nPoints outside given histogram range =',extrapoints
+    return (bins, lowerreallimit, binsize, extrapoints)
+
+
+def lcumfreq(inlist,numbins=10,defaultreallimits=None):
+    """
+Returns a cumulative frequency histogram, using the histogram function.
+
+Usage:   lcumfreq(inlist,numbins=10,defaultreallimits=None)
+Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+    h,l,b,e = histogram(inlist,numbins,defaultreallimits)
+    cumhist = cumsum(copy.deepcopy(h))
+    return cumhist,l,b,e
+
+
+def lrelfreq(inlist,numbins=10,defaultreallimits=None):
+    """
+Returns a relative frequency histogram, using the histogram function.
+
+Usage:   lrelfreq(inlist,numbins=10,defaultreallimits=None)
+Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+    h,l,b,e = histogram(inlist,numbins,defaultreallimits)
+    for i in range(len(h)):
+        h[i] = h[i]/float(len(inlist))
+    return h,l,b,e
+
+
+####################################
+#####  VARIABILITY FUNCTIONS  ######
+####################################
+
+def lobrientransform(*args):
+    """
+Computes a transform on input data (any number of columns).  Used to
+test for homogeneity of variance prior to running one-way stats.  From
+Maxwell and Delaney, p.112.
+
+Usage:   lobrientransform(*args)
+Returns: transformed data for use in an ANOVA
+"""
+    TINY = 1e-10
+    k = len(args)
+    n = [0.0]*k
+    v = [0.0]*k
+    m = [0.0]*k
+    nargs = []
+    for i in range(k):
+        nargs.append(copy.deepcopy(args[i]))
+        n[i] = float(len(nargs[i]))
+        v[i] = var(nargs[i])
+        m[i] = mean(nargs[i])
+    for j in range(k):
+        for i in range(n[j]):
+            t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2
+            t2 = 0.5*v[j]*(n[j]-1.0)
+            t3 = (n[j]-1.0)*(n[j]-2.0)
+            nargs[j][i] = (t1-t2) / float(t3)
+    check = 1
+    for j in range(k):
+        if v[j] - mean(nargs[j]) > TINY:
+            check = 0
+    if check <> 1:
+        raise ValueError, 'Problem in obrientransform.'
+    else:
+        return nargs
+
+
+def lsamplevar (inlist):
+    """
+Returns the variance of the values in the passed list using
+N for the denominator (i.e., DESCRIBES the sample variance only).
+
+Usage:   lsamplevar(inlist)
+"""
+    n = len(inlist)
+    mn = mean(inlist)
+    deviations = []
+    for item in inlist:
+        deviations.append(item-mn)
+    return ss(deviations)/float(n)
+
+
+def lsamplestdev (inlist):
+    """
+Returns the standard deviation of the values in the passed list using
+N for the denominator (i.e., DESCRIBES the sample stdev only).
+
+Usage:   lsamplestdev(inlist)
+"""
+    return math.sqrt(samplevar(inlist))
+
+
+def lvar (inlist):
+    """
+Returns the variance of the values in the passed list using N-1
+for the denominator (i.e., for estimating population variance).
+
+Usage:   lvar(inlist)
+"""
+    n = len(inlist)
+    mn = mean(inlist)
+    deviations = [0]*len(inlist)
+    for i in range(len(inlist)):
+        deviations[i] = inlist[i] - mn
+    return ss(deviations)/float(n-1)
+
+
+def lstdev (inlist):
+    """
+Returns the standard deviation of the values in the passed list
+using N-1 in the denominator (i.e., to estimate population stdev).
+
+Usage:   lstdev(inlist)
+"""
+    return math.sqrt(var(inlist))
+
+
+def lsterr(inlist):
+    """
+Returns the standard error of the values in the passed list using N-1
+in the denominator (i.e., to estimate population standard error).
+
+Usage:   lsterr(inlist)
+"""
+    return stdev(inlist) / float(math.sqrt(len(inlist)))
+
+
+def lsem (inlist):
+    """
+Returns the estimated standard error of the mean (sx-bar) of the
+values in the passed list.  sem = stdev / sqrt(n)
+
+Usage:   lsem(inlist)
+"""
+    sd = stdev(inlist)
+    n = len(inlist)
+    return sd/math.sqrt(n)
+
+
+def lz (inlist, score):
+    """
+Returns the z-score for a given input score, given that score and the
+list from which that score came.  Not appropriate for population calculations.
+
+Usage:   lz(inlist, score)
+"""
+    z = (score-mean(inlist))/samplestdev(inlist)
+    return z
+
+
+def lzs (inlist):
+    """
+Returns a list of z-scores, one for each score in the passed list.
+
+Usage:   lzs(inlist)
+"""
+    zscores = []
+    for item in inlist:
+        zscores.append(z(inlist,item))
+    return zscores
+
+
+####################################
+#######  TRIMMING FUNCTIONS  #######
+####################################
+
+def ltrimboth (l,proportiontocut):
+    """
+Slices off the passed proportion of items from BOTH ends of the passed
+list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost'
+10% of scores.  Assumes list is sorted by magnitude.  Slices off LESS if
+proportion results in a non-integer slice index (i.e., conservatively
+slices off proportiontocut).
+
+Usage:   ltrimboth (l,proportiontocut)
+Returns: trimmed version of list l
+"""
+    lowercut = int(proportiontocut*len(l))
+    uppercut = len(l) - lowercut
+    return l[lowercut:uppercut]
+
+
+def ltrim1 (l,proportiontocut,tail='right'):
+    """
+Slices off the passed proportion of items from ONE end of the passed
+list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'
+10% of scores).  Slices off LESS if proportion results in a non-integer
+slice index (i.e., conservatively slices off proportiontocut).
+
+Usage:   ltrim1 (l,proportiontocut,tail='right')  or set tail='left'
+Returns: trimmed version of list l
+"""
+    if tail == 'right':
+        lowercut = 0
+        uppercut = len(l) - int(proportiontocut*len(l))
+    elif tail == 'left':
+        lowercut = int(proportiontocut*len(l))
+        uppercut = len(l)
+    return l[lowercut:uppercut]
+
+
+####################################
+#####  CORRELATION FUNCTIONS  ######
+####################################
+
+def lpaired(x,y):
+    """
+Interactively determines the type of data and then runs the
+appropriated statistic for paired group data.
+
+Usage:   lpaired(x,y)
+Returns: appropriate statistic name, value, and probability
+"""
+    samples = ''
+    while samples not in ['i','r','I','R','c','C']:
+        print '\nIndependent or related samples, or correlation (i,r,c): ',
+        samples = raw_input()
+
+    if samples in ['i','I','r','R']:
+        print '\nComparing variances ...',
+# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
+        r = obrientransform(x,y)
+        f,p = F_oneway(pstat.colex(r,0),pstat.colex(r,1))
+        if p<0.05:
+            vartype='unequal, p='+str(round(p,4))
+        else:
+            vartype='equal'
+        print vartype
+        if samples in ['i','I']:
+            if vartype[0]=='e':
+                t,p = ttest_ind(x,y,0)
+                print '\nIndependent samples t-test:  ', round(t,4),round(p,4)
+            else:
+                if len(x)>20 or len(y)>20:
+                    z,p = ranksums(x,y)
+                    print '\nRank Sums test (NONparametric, n>20):  ', round(z,4),round(p,4)
+                else:
+                    u,p = mannwhitneyu(x,y)
+                    print '\nMann-Whitney U-test (NONparametric, ns<20):  ', round(u,4),round(p,4)
+
+        else:  # RELATED SAMPLES
+            if vartype[0]=='e':
+                t,p = ttest_rel(x,y,0)
+                print '\nRelated samples t-test:  ', round(t,4),round(p,4)
+            else:
+                t,p = ranksums(x,y)
+                print '\nWilcoxon T-test (NONparametric):  ', round(t,4),round(p,4)
+    else:  # CORRELATION ANALYSIS
+        corrtype = ''
+        while corrtype not in ['c','C','r','R','d','D']:
+            print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
+            corrtype = raw_input()
+        if corrtype in ['c','C']:
+            m,b,r,p,see = linregress(x,y)
+            print '\nLinear regression for continuous variables ...'
+            lol = [['Slope','Intercept','r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]]
+            pstat.printcc(lol)
+        elif corrtype in ['r','R']:
+            r,p = spearmanr(x,y)
+            print '\nCorrelation for ranked variables ...'
+            print "Spearman's r: ",round(r,4),round(p,4)
+        else: # DICHOTOMOUS
+            r,p = pointbiserialr(x,y)
+            print '\nAssuming x contains a dichotomous variable ...'
+            print 'Point Biserial r: ',round(r,4),round(p,4)
+    print '\n\n'
+    return None
+
+
+def lpearsonr(x,y):
+    """
+Calculates a Pearson correlation coefficient and the associated
+probability value.  Taken from Heiman's Basic Statistics for the Behav.
+Sci (2nd), p.195.
+
+Usage:   lpearsonr(x,y)      where x and y are equal-length lists
+Returns: Pearson's r value, two-tailed p-value
+"""
+    TINY = 1.0e-30
+    if len(x) <> len(y):
+        raise ValueError, 'Input values not paired in pearsonr.  Aborting.'
+    n = len(x)
+    x = map(float,x)
+    y = map(float,y)
+    xmean = mean(x)
+    ymean = mean(y)
+    r_num = n*(summult(x,y)) - sum(x)*sum(y)
+    r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y)))
+    r = (r_num / r_den)  # denominator already a float
+    df = n-2
+    t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
+    prob = betai(0.5*df,0.5,df/float(df+t*t))
+    return r, prob
+
+
+def lspearmanr(x,y):
+    """
+Calculates a Spearman rank-order correlation coefficient.  Taken
+from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.
+
+Usage:   lspearmanr(x,y)      where x and y are equal-length lists
+Returns: Spearman's r, two-tailed p-value
+"""
+    TINY = 1e-30
+    if len(x) <> len(y):
+        raise ValueError, 'Input values not paired in spearmanr.  Aborting.'
+    n = len(x)
+    rankx = rankdata(x)
+    ranky = rankdata(y)
+    dsq = sumdiffsquared(rankx,ranky)
+    rs = 1 - 6*dsq / float(n*(n**2-1))
+    t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs)))
+    df = n-2
+    probrs = betai(0.5*df,0.5,df/(df+t*t))  # t already a float
+# probability values for rs are from part 2 of the spearman function in
+# Numerical Recipies, p.510.  They are close to tables, but not exact. (?)
+    return rs, probrs
+
+
+def lpointbiserialr(x,y):
+    """
+Calculates a point-biserial correlation coefficient and the associated
+probability value.  Taken from Heiman's Basic Statistics for the Behav.
+Sci (1st), p.194.
+
+Usage:   lpointbiserialr(x,y)      where x,y are equal-length lists
+Returns: Point-biserial r, two-tailed p-value
+"""
+    TINY = 1e-30
+    if len(x) <> len(y):
+        raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr.  ABORTING.'
+    data = pstat.abut(x,y)
+    categories = pstat.unique(x)
+    if len(categories) <> 2:
+        raise ValueError, "Exactly 2 categories required for pointbiserialr()."
+    else:   # there are 2 categories, continue
+        codemap = pstat.abut(categories,range(2))
+        recoded = pstat.recode(data,codemap,0)
+        x = pstat.linexand(data,0,categories[0])
+        y = pstat.linexand(data,0,categories[1])
+        xmean = mean(pstat.colex(x,1))
+        ymean = mean(pstat.colex(y,1))
+        n = len(data)
+        adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n)))
+        rpb = (ymean - xmean)/samplestdev(pstat.colex(data,1))*adjust
+        df = n-2
+        t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))
+        prob = betai(0.5*df,0.5,df/(df+t*t))  # t already a float
+        return rpb, prob
+
+
+def lkendalltau(x,y):
+    """
+Calculates Kendall's tau ... correlation of ordinal data.  Adapted
+from function kendl1 in Numerical Recipies.  Needs good test-routine.@@@
+
+Usage:   lkendalltau(x,y)
+Returns: Kendall's tau, two-tailed p-value
+"""
+    n1 = 0
+    n2 = 0
+    iss = 0
+    for j in range(len(x)-1):
+        for k in range(j,len(y)):
+            a1 = x[j] - x[k]
+            a2 = y[j] - y[k]
+            aa = a1 * a2
+            if (aa):             # neither list has a tie
+                n1 = n1 + 1
+                n2 = n2 + 1
+                if aa > 0:
+                    iss = iss + 1
+                else:
+                    iss = iss -1
+            else:
+                if (a1):
+                    n1 = n1 + 1
+                else:
+                    n2 = n2 + 1
+    tau = iss / math.sqrt(n1*n2)
+    svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1))
+    z = tau / math.sqrt(svar)
+    prob = erfcc(abs(z)/1.4142136)
+    return tau, prob
+
+
+def llinregress(x,y):
+    """
+Calculates a regression line on x,y pairs.  
+
+Usage:   llinregress(x,y)      x,y are equal-length lists of x-y coordinates
+Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
+"""
+    TINY = 1.0e-20
+    if len(x) <> len(y):
+        raise ValueError, 'Input values not paired in linregress.  Aborting.'
+    n = len(x)
+    x = map(float,x)
+    y = map(float,y)
+    xmean = mean(x)
+    ymean = mean(y)
+    r_num = float(n*(summult(x,y)) - sum(x)*sum(y))
+    r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y)))
+    r = r_num / r_den
+    z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY))
+    df = n-2
+    t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
+    prob = betai(0.5*df,0.5,df/(df+t*t))
+    slope = r_num / float(n*ss(x) - square_of_sums(x))
+    intercept = ymean - slope*xmean
+    sterrest = math.sqrt(1-r*r)*samplestdev(y)
+    return slope, intercept, r, prob, sterrest
+
+
+####################################
+#####  INFERENTIAL STATISTICS  #####
+####################################
+
+def lttest_1samp(a,popmean,printit=0,name='Sample',writemode='a'):
+    """
+Calculates the t-obtained for the independent samples T-test on ONE group
+of scores a, given a population mean.  If printit=1, results are printed
+to the screen.  If printit='filename', the results are output to 'filename'
+using the given writemode (default=append).  Returns t-value, and prob.
+
+Usage:   lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
+Returns: t-value, two-tailed prob
+"""
+    x = mean(a)
+    v = var(a)
+    n = len(a)
+    df = n-1
+    svar = ((n-1)*v)/float(df)
+    t = (x-popmean)/math.sqrt(svar*(1.0/n))
+    prob = betai(0.5*df,0.5,float(df)/(df+t*t))
+
+    if printit <> 0:
+        statname = 'Single-sample T-test.'
+        outputpairedstats(printit,writemode,
+                          'Population','--',popmean,0,0,0,
+                          name,n,x,v,min(a),max(a),
+                          statname,t,prob)
+    return t,prob
+
+
+def lttest_ind (a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'):
+    """
+Calculates the t-obtained T-test on TWO INDEPENDENT samples of
+scores a, and b.  From Numerical Recipies, p.483.  If printit=1, results
+are printed to the screen.  If printit='filename', the results are output
+to 'filename' using the given writemode (default=append).  Returns t-value,
+and prob.
+
+Usage:   lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a')
+Returns: t-value, two-tailed prob
+"""
+    x1 = mean(a)
+    x2 = mean(b)
+    v1 = stdev(a)**2
+    v2 = stdev(b)**2
+    n1 = len(a)
+    n2 = len(b)
+    df = n1+n2-2
+    svar = ((n1-1)*v1+(n2-1)*v2)/float(df)
+    t = (x1-x2)/math.sqrt(svar*(1.0/n1 + 1.0/n2))
+    prob = betai(0.5*df,0.5,df/(df+t*t))
+
+    if printit <> 0:
+        statname = 'Independent samples T-test.'
+        outputpairedstats(printit,writemode,
+                          name1,n1,x1,v1,min(a),max(a),
+                          name2,n2,x2,v2,min(b),max(b),
+                          statname,t,prob)
+    return t,prob
+
+
+def lttest_rel (a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a'):
+    """
+Calculates the t-obtained T-test on TWO RELATED samples of scores,
+a and b.  From Numerical Recipies, p.483.  If printit=1, results are
+printed to the screen.  If printit='filename', the results are output to
+'filename' using the given writemode (default=append).  Returns t-value,
+and prob.
+
+Usage:   lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a')
+Returns: t-value, two-tailed prob
+"""
+    if len(a)<>len(b):
+        raise ValueError, 'Unequal length lists in ttest_rel.'
+    x1 = mean(a)
+    x2 = mean(b)
+    v1 = var(a)
+    v2 = var(b)
+    n = len(a)
+    cov = 0
+    for i in range(len(a)):
+        cov = cov + (a[i]-x1) * (b[i]-x2)
+    df = n-1
+    cov = cov / float(df)
+    sd = math.sqrt((v1+v2 - 2.0*cov)/float(n))
+    t = (x1-x2)/sd
+    prob = betai(0.5*df,0.5,df/(df+t*t))
+
+    if printit <> 0:
+        statname = 'Related samples T-test.'
+        outputpairedstats(printit,writemode,
+                          name1,n,x1,v1,min(a),max(a),
+                          name2,n,x2,v2,min(b),max(b),
+                          statname,t,prob)
+    return t, prob
+
+
+def lchisquare(f_obs,f_exp=None):
+    """
+Calculates a one-way chi square for list of observed frequencies and returns
+the result.  If no expected frequencies are given, the total N is assumed to
+be equally distributed across all groups.
+
+Usage:   lchisquare(f_obs, f_exp=None)   f_obs = list of observed cell freq.
+Returns: chisquare-statistic, associated p-value
+"""
+    k = len(f_obs)                 # number of groups
+    if f_exp == None:
+        f_exp = [sum(f_obs)/float(k)] * len(f_obs) # create k bins with = freq.
+    chisq = 0
+    for i in range(len(f_obs)):
+        chisq = chisq + (f_obs[i]-f_exp[i])**2 / float(f_exp[i])
+    return chisq, chisqprob(chisq, k-1)
+
+
+def lks_2samp (data1,data2):
+    """
+Computes the Kolmogorov-Smirnof statistic on 2 samples.  From
+Numerical Recipies in C, page 493.
+
+Usage:   lks_2samp(data1,data2)   data1&2 are lists of values for 2 conditions