Source

evolve-keyboard-layout / regularity_check.py

Full commit
#!/usr/bin/env python3
# encoding: utf-8

u"""Check the regularity of a keyboard layout for a reference textfile."""

from __future__ import with_statement
from __future__ import division
from optparse import OptionParser
from io import open

### config

#: Length of the segments into which we split the text. Currently arbitrary (~two times a twitter message)
segment_length = 270

#: The output filename. Can be overwritten with the -o parameter.
output = u"res.txt" 
output_words = u"res-words.txt"

#: The file with the example text.
textfile = u"beispieltext-prosa.txt"

#: Echo the results on the console?
verbose = False

#: The layout to use as base for mutations. If you want a given starting layout, also set prerandomize = 0.
LAYOUT = u"""xvlcw khgfqß´
uiaeo snrtdy
üöäpz bm,.j"""

### predefined layouts

Neo2 = u"""xvlcw khgfqß´
uiaeo snrtdy
üöäpz bm,.j"""

Qwertz = u"""qwert zuiopü+
asdfg hjklöä
yxcvb nm,.-"""

NordTast = u"""äuobp kglmfx´
aietc hdnrsß
.,üöq yzwvj"""

Andreas100504 = u"""jäo.ü khclfv´
teaiu gdnrsß
xqö,y bpmwz"""

Vrijbuiter = u"""joä,ü khclfv´
taeiu gdnrsß
xöq.y bpmwz"""

fiae = u"""xuc.ö vdsljq´
fiaeo mtrnhk
,üzäy bgßwp"""

haeiu = """xzo., pcslvß´
haeiu dtrnmf
⇚kyäüö bgjqw"""

AdNW = """kuü.ä vgcljf´
hieao dtrnsß
⇚xyö,q bpwmz"""


### Parse console arguments

parser = OptionParser(usage = u"script to check the regularity of the layout for a reference textfile", version = u"0.1")
parser.add_option(u"-l", u"--layout", type=u"string", dest=u"layout", default=LAYOUT, help=u"the layout to use")
parser.add_option(u"-n", u"--layout_name", type=u"string", dest=u"layout_name", default=None, help=u"the predefined layout to use, given by name (Neo, Qwertz, …)")
parser.add_option(u"-o", u"--output", type=u"string", dest=u"output", default=output, help=u"the file to use for the output")
parser.add_option(u"-w", u"--words-output", type=u"string", dest=u"output_words", default=output_words, help=u"the file to use for the output of the word statistics")
parser.add_option(u"-t", u"--textfile", type=u"string", dest=u"textfile", default=textfile, help=u"the file with the reference text")
parser.add_option(u"-v", u"--verbose", action=u"store_true", default=False, help=u"echo the results on the console")

(options, args) = parser.parse_args()

if options.layout_name is not None:
    try: 
        options.layout = eval(options.layout_name)
    except NameError:
        print u"the layout", options.layout_name, u"is not predefined. Please use --layout to give it as string."
        exit()

### run

from check_neo import string_to_layout, total_cost, get_all_data, read_file

layout = string_to_layout(options.layout)

def check(layout=layout, verbose=False, data=None):
    u"""Get the value for a layout using a given string as reference text."""
    letters, number_of_letters, repeats, number_of_bigrams, trigrams, number_of_trigrams = get_all_data(data=data)

    total, frep_num, cost, frep_top_bottom, disbalance, no_handswitches, line_change_same_hand = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams)[:7]
    # total, cost_w, frep_num_w, frep_num_top_bottom_w, neighboring_fings_w, fing_disbalance_w, no_handswitches_w, badly_positioned_w, line_change_same_hand_w, no_switch_after_unbalancing_w = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams, return_weighted=True)[:10]
    return total / number_of_letters

def std(numbers):
    u"""Calculate the standard deviation from a set of numbers.

    This simple calculation is only valid for more than 100 numbers or so. That means I use it in the invalid area. But since it’s just an arbitrary metric, that doesn’t hurt.

    >>> std([1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1]*10)
    1.607945243653783
    """
    length = float(len(numbers))
    mean = sum(numbers)/max(1, length)
    var = 0
    for i in numbers:
        var += (i - mean)**2
    var /= max(1, (length - 1))
    from math import sqrt
    return sqrt(var)


# processing and output (interleaved to be able to read really big files incrementally)
f = open(options.textfile, u"r")
# clear the output file
fout = open(options.output, u"w")
fout.write(u"")
fout.close()

res = []
d = f.read(segment_length)
while d:
    cost = check(layout=layout, data=d)
    d = f.read(segment_length)
    if options.verbose:
        print cost
    with open(options.output, u"a") as fout: 
        fout.write(unicode(cost) + u"\n")
    res.append(cost)

f.close()
fout.close()

# same for words
with open(options.textfile, u"r") as f: 
    data = f.read()

f = open(options.textfile, u"r")
# clear the output file
fout = open(options.output_words, u"w")
fout.write(u"")
fout.close()

res_words = []
d = f.read(100*segment_length)
while d:
    res_tmp = []
    for word in d.split():
        if word:
            cost = check(layout=layout, data=word)
            res_tmp.append(cost)
            if options.verbose:
                print cost
    with open(options.output_words, u"a") as fout: 
        fout.writelines([unicode(cost) + u"\n" for cost in res_tmp])
    res_words.extend(res_tmp)
    d = f.read(100*segment_length)

        
f.close()
fout.close()

print u"mean value and standard deviation of the layout cost:"
print u"snippets of", segment_length, u"letters:", sum(res)/len(res), u"±", std(res)
print u"words:", sum(res_words)/len(res_words), u"±", std(res)