# whoosh / src / whoosh / support / levenshtein.py

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78``` ```""" Contains functions implementing edit distance algorithms. """ from whoosh.compat import xrange def levenshtein(seq1, seq2, limit=None): """ Returns the Levenshtein edit distance between two strings. """ oneago = None thisrow = list(xrange(1, len(seq2) + 1)) + [0] for x in xrange(len(seq1)): # Python lists wrap around for negative indices, so put the # leftmost column at the *end* of the list. This matches with # the zero-indexed strings and saves extra calculation. oneago, thisrow = thisrow, [0] * len(seq2) + [x + 1] for y in xrange(len(seq2)): delcost = oneago[y] + 1 addcost = thisrow[y - 1] + 1 subcost = oneago[y - 1] + (seq1[x] != seq2[y]) thisrow[y] = min(delcost, addcost, subcost) if limit and x > limit and min(thisrow) > limit: return limit + 1 return thisrow[len(seq2) - 1] def damerau_levenshtein(seq1, seq2, limit=2, prefix=0): """ Returns the Damerau-Levenshtein edit distance between two strings. """ if prefix and seq1[:prefix] != seq2[:prefix]: return 0, limit + 1 oneago = None thisrow = list(range(1, len(seq2) + 1)) + [0] x = 0 for x in xrange(len(seq1)): # Python lists wrap around for negative indices, so put the # leftmost column at the *end* of the list. This matches with # the zero-indexed strings and saves extra calculation. twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1] for y in xrange(len(seq2)): delcost = oneago[y] + 1 addcost = thisrow[y - 1] + 1 subcost = oneago[y - 1] + (seq1[x] != seq2[y]) thisrow[y] = min(delcost, addcost, subcost) # This block deals with transpositions if (x > 0 and y > 0 and seq1[x] == seq2[y - 1] and seq1[x - 1] == seq2[y] and seq1[x] != seq2[y]): thisrow[y] = min(thisrow[y], twoago[y - 2] + 1) lowest = min(thisrow) if lowest > limit: return limit + 1 return thisrow[len(seq2) - 1] def relative(a, b): """ Returns the relative distance between two strings, in the range [0-1] where 1 means total equality. """ d = distance(a, b) longer = float(max((len(a), len(b)))) shorter = float(min((len(a), len(b)))) r = ((longer - d) / longer) * (shorter / longer) return r distance = damerau_levenshtein ```