Source

rules-engine / rules_test.py

'''Usage: rules_test <reference-file> <your-results>'''

import csv, sys, math

if len(sys.argv) != 3:
    print __doc__
    sys.exit(-1)

def num(s):
    if not s or s == '0': return False
    else: return True

reference_file, results_file = sys.argv[1:]

# Read the results file first
results = csv.reader(open(results_file))
results_header = results.next()
results_id = results_header[0]
results_data = {}
for row in results: results_data[row[0]] = row

# Compare with the output file. Just a single parameter for now
reference = csv.reader(open(reference_file))
reference_header = reference.next()
index = reference_header.index(results_header[1])

yy, yn, ny, nn, cxy, cxx, cyy, cx, cy, n = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

for row in reference:
    id = row[0]
    should_be  = num(row[index])
    guessed_as = num(results_data[id][1])

    if not should_be and not guessed_as: nn += 1;
    if     should_be and not guessed_as: yn += 1; cxx += 1;                     cx += 1;
    if not should_be and     guessed_as: ny += 1;                     cyy += 1;          cy += 1
    if     should_be and     guessed_as: nn += 1; cxx += 1; cxy += 1; cyy += 1; cx += 1; cy += 1

    n += 1

print 'Correctly identified: ', yy + nn
print 'False positives: ', yn
print 'False negatives: ', ny
sxy = (n*cxx - cx*cx)*(n*cyy - cy*cy)
if sxy > 0:
    print 'Correlation: ', (n*cxy - cx * cy) / math.sqrt(sxy)
else:
    print 'Correlation: undefined (your results have a constant result)'