- - https://dbjergaard.github.io/posts/matasano_set_1.html
- - [Pearson's chi-squared test](https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test)
- - [Chi-square distribution introduction | Probability and Statistics | Khan Academy](https://www.youtube.com/watch?v=dXB3cUGnaxQ)
- - [Pearson's chi square test (goodness of fit) | Probability and Statistics | Khan Academy ](https://www.youtube.com/watch?v=2QeDRsxSF9M)
+ - https://dbjergaard.github.io/posts/matasano_set_1.html recommends chi-squared. Cool concept but super slow in Python. Would not recommend.
+ - [Pearson's chi-squared test](https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test)
+ - [Chi-square distribution introduction | Probability and Statistics | Khan Academy](https://www.youtube.com/watch?v=dXB3cUGnaxQ)
+ - [Pearson's chi square test (goodness of fit) | Probability and Statistics | Khan Academy ](https://www.youtube.com/watch?v=2QeDRsxSF9M)
+from collections import Counter
+from itertools import cycle
+# Source http://jsbin.com/kaxoxajige/1/edit?js,output
+expected = [0.08167,0.01492,0.02782,0.04253,0.12702,0.02228,0.02015,0.06094,0.06966,0.00153,0.00772,
+0.04025,0.02406,0.06749,0.07507,0.01929,0.00095,0.05987,0.06327,0.09056,0.02758,0.00978,
+0.02360,0.00150,0.01974,0.00074]
+# Source -h https://hflog.wordpress.com/2014/04/01/how-to-perform-a-chi-squared-goodness-of-fit-test-in-python
+ #Play with these values to adjust the error of the approximation.
+ step=upper_bound/resolution
+ while val<=upper_bound:
+ rolling_sum+=step*(val**(x-1)*2.7182818284590452353602874713526624977**(-val))
+ val+=(((-1)**k)*z**(s+k))/(math.factorial(k)*(s+k))
+ return 1-ilgf(k/2,x/2)/gf(k/2)
+def chisquare(observed_values,expected_values):
+ for observed, expected in zip(observed_values, expected_values):
+ test_statistic+=(float(observed)-float(expected))**2/float(expected)
+ df=len(observed_values)-1
+ return test_statistic, chisquarecdf(test_statistic,df)
+ message = message.lower().strip(" ")
+ for char in "abcdefghijklmnopqrstuvwxyz":
+ freq.append(float(message.count(char)) / length)
+def xor_mb(message, key):
+ return''.join(chr(ord(m_byte)^ord(k_byte)) for m_byte,k_byte in zip(message, cycle(key)))
+printset = set(string.printable)
+messages = open("4.txt", 'r').readlines()
+ message = line.rstrip().decode('hex')
+ for key in range(0 ,256):
+ temp = xor_mb(message, chr(key))
+ temp_freq = frequency(temp)
+ chi, tt = chisquare(temp_freq, expected)