# hachoir / hachoir-tools / entropy.py

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74``` ```#!/usr/bin/python from math import log class Entropy: def __init__(self): self.frequence = dict( (chr(index), 0) for index in xrange(0, 256) ) self.count = 0 def readBytes(self, bytes): for byte in bytes: self.frequence[byte] = self.frequence[byte] + 1 self.count += len(bytes) return self def compute(self): h = 0 for value in self.frequence.itervalues(): if not value: continue p_i = float(value) / self.count h -= p_i * log(p_i, 2) return h from time import time from sys import stderr class EntropyFile(Entropy): def __init__(self): Entropy.__init__(self) self.progress_time = 1.0 self.buffer_size = 4096 def displayProgress(self, percent): print >>stderr, "Progress: %.1f%%" % percent def readStream(self, stream, streamsize=None): # Read stream size if streamsize is None: stream.seek(0, 2) streamsize = stream.tell() if streamsize <= 0: raise ValueError("Empty stream") # Read stream content stream.seek(0,0) next_msg = time() + self.progress_time while True: if next_msg <= time(): self.displayProgress(stream.tell() * 100.0 / streamsize) next_msg = time() + self.progress_time raw = stream.read(self.buffer_size) if not raw: break self.readBytes(raw) return self def readFile(self, filename): stream = open(filename, 'rb') self.readStream(stream) return self def main(): from sys import argv, exit if len(argv) != 2: print >>stderr, "usage: %s filename" % argv[0] exit(1) entropy = EntropyFile() entropy.readFile(argv[1]) print "Entropy: %.4f bit/byte" % entropy.compute() exit(0) if __name__ == "__main__": main() ```