ecrypto / md5.el

;;;  md5.el -- MD5 message digest algorithm

;; Copyright (C) 1998 Ray Jones

;; Author: Ray Jones,
;; Keywords: MD5, message digest
;; Created: 1998-04-27

;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program; if not, you can either send email to this
;; program's maintainer or write to: The Free Software Foundation,
;; Inc.; 675 Massachusetts Avenue; Cambridge, MA 02139, USA.

;;; Commentary:

;; this is an implementation of the MD5 hashing algorithm as described
;; in RFC 1321.  Applied Cryptography, by Bruce Schneier, was used as
;; a reference, but it should be noted that the initialization values
;; for the chaining vectors in that book are in byte-reversed order,
;; as of the 4th printing.  the mixin constants are correct, though.
;; this code might be somewhat confusing at first (or second).  two
;; sources of confusion are likely: the fact that MD5 works in least
;; significant byte order on the data, and that this code represents
;; 32 bit numbers as two 16 bit numbers (most emacsen not being able
;; to go past 27 bits).
;; this code was first written to appear as close to the format in the
;; RFC, then adjusted to take advantage of the patterns in the message
;; index (v-idx, below, in md5-vectors).  originally, numbers were
;; passed around and operated on as pairs, but the algorithm can be
;; (and has been) made to operate independently on the two 16-bit
;; halves of the numbers, combining both halves only when doing adds
;; and circular shifts.  this keeps consing to a minimum (almost
;; none), and roughly doubles the speed compared to the equivalent
;; algorithm operating on numbers as pairs.
;; it also provides for greater security when hashing sensistive
;; strings, since less data is created and left for the GC to clean
;; up.
;; there should be a file called md5-old.el accompanying this file.
;; it is the original, slow, consful version of this code, and is
;; (hopefully) easier to understand.

(require 'cl)

(defun md5 (string)
  "return the md5 hash of a string, as a 128 bit string"
  (let* ((length (length string))
	 ;; md5 requires the message be padded to a length of 512*k +
	 ;; 64 (bits).  confusion source: we're working with bytes.
	 ;; padding is always done.
	 ;; 512 bits = 64 bytes, 64 bits = 8 bytes
	 (next-512 (+ 64 (logand (+ length 8) (lognot 63))))
	 (pad-bytes (- next-512 length 8))
	 (pad-string (make-string pad-bytes 0))
	 (len-string (make-string 8 0)))
    ;; message is constructed as:
    ;; original-message | pad | length-in-bits
    ;; pad is 10000... (bitwise)
    ;; length-in-bits is length before padding, and is 64 bits long

    ;; fill in the single bit of the pad
    (aset pad-string 0 (ash 1 7))

    ;; there's a slim chance of overflow when multiplying the length
    ;; by 8 to get the length in bits.  to avoid this, do some
    ;; slightly hairier math when writing the length into len-string.
    ;; also, it has to be LSB-first.  be still my aching brain.

    ;; LSB sucks.

    ;; only do the first 4 bytes, even though supposedly there are 8.
    ;; 32 bit emacsen think that (ash 40 -37) => 1
    ;; (supposed to be fixed in future releases)
    (dotimes (idx 4)
      (aset len-string idx (logand ?\xff
				   (ash length (- 3 (* idx 8))))))

    (let* ((concat-string (concat string pad-string len-string))
           (vecs (md5-string-to-32bit-vecs concat-string))) 
          (md5-vectors (car vecs) (cdr vecs))
        ;; clear out the concat-string and vectors, in case they are
        ;; sensitive
        (fillarray concat-string ?0)
	(fillarray (car vecs) 0)
	(fillarray (cdr vecs) 0)))))

(defun md5-string-to-32bit-vecs (string)
  "turn a string into 32 bit numbers, with high and low 16bit halves
in different vectors.
returned as \(cons vec-hi vec-lo\)."
  ;; emacs doesn't actually have 32 bits, in most implementations.
  ;; 32 bit numbers are represented as a pair of 16 bit numbers.

  ;; 4 chars per 32 bit number, in LSB-first!
  (let* ((veclen (/ (length string) 4))
	 (vec-hi (make-vector veclen 0))
	 (vec-lo (make-vector veclen 0))
	 (stridx 0))
    (dotimes (vecidx veclen)
      ;; MD5 integers are kept as two 16 bit words
      ;; note the LSB magic/annoyance
      (aset vec-hi vecidx (+ (ash (aref string (+ stridx 3)) 8)
			     (aref string (+ stridx 2))))
      (aset vec-lo vecidx (+ (ash (aref string (+ stridx 1)) 8)
			     (aref string (+ stridx 0))))
      (incf stridx 4))

    (cons vec-hi vec-lo)))

;; array of values for i=[1..64] => floor(2^32 * abs(sin(i)))
;; broken into two arrays, hi values and low
(defconst md5-t-hi
  [?\xd76a ?\xe8c7 ?\x2420 ?\xc1bd 
           ?\xf57c ?\x4787 ?\xa830 ?\xfd46
           ?\x6980 ?\x8b44 ?\xffff ?\x895c 
           ?\x6b90 ?\xfd98 ?\xa679 ?\x49b4
           ?\xf61e ?\xc040 ?\x265e ?\xe9b6 
           ?\xd62f ?\x0244 ?\xd8a1 ?\xe7d3
           ?\x21e1 ?\xc337 ?\xf4d5 ?\x455a 
           ?\xa9e3 ?\xfcef ?\x676f ?\x8d2a
           ?\xfffa ?\x8771 ?\x6d9d ?\xfde5 
           ?\xa4be ?\x4bde ?\xf6bb ?\xbebf
           ?\x289b ?\xeaa1 ?\xd4ef ?\x0488 
           ?\xd9d4 ?\xe6db ?\x1fa2 ?\xc4ac
           ?\xf429 ?\x432a ?\xab94 ?\xfc93 
           ?\x655b ?\x8f0c ?\xffef ?\x8584
           ?\x6fa8 ?\xfe2c ?\xa301 ?\x4e08 
           ?\xf753 ?\xbd3a ?\x2ad7 ?\xeb86])

(defconst md5-t-lo
  [?\xa478 ?\xb756 ?\x70db ?\xceee
           ?\x0faf ?\xc62a ?\x4613 ?\x9501
           ?\x98d8 ?\xf7af ?\x5bb1 ?\xd7be
           ?\x1122 ?\x7193 ?\x438e ?\x0821
           ?\x2562 ?\xb340 ?\x5a51 ?\xc7aa
           ?\x105d ?\x1453 ?\xe681 ?\xfbc8
           ?\xcde6 ?\x07d6 ?\x0d87 ?\x14ed
           ?\xe905 ?\xa3f8 ?\x02d9 ?\x4c8a
           ?\x3942 ?\xf681 ?\x6122 ?\x380c 
           ?\xea44 ?\xcfa9 ?\x4b60 ?\xbc70
           ?\x7ec6 ?\x27fa ?\x3085 ?\x1d05 
           ?\xd039 ?\x99e5 ?\x7cf8 ?\x5665
           ?\x2244 ?\xff97 ?\x23a7 ?\xa039
           ?\x59c3 ?\xcc92 ?\xf47d ?\x5dd1
           ?\x7e4f ?\xe6e0 ?\x4314 ?\x11a1
           ?\x7e82 ?\xf235 ?\xd2bb ?\xd391])

  (defun md5<<< (val-hi val-lo shift)
    "macro to circular shift val-(hi,lo)  by SHIFT bits"
    ;; shifts greater than 16 need to be handled by a swap, then a
    ;; smaller shift
    (if (>= shift 16)
	  (decf shift 16)
	  (if (= shift 0)
	      `(rotatef ,val-hi ,val-lo)
	    ;; swapped shift
	    `(let ((a (logand ?\xffff (logior (ash ,val-lo ,shift) (ash ,val-hi ,(- shift 16)))))
		   (b (logand ?\xffff (logior (ash ,val-hi ,shift) (ash ,val-lo ,(- shift 16))))))
	       (setq ,val-hi a
		     ,val-lo b))))
      `(let ((a (logand ?\xffff (logior (ash ,val-hi ,shift) (ash ,val-lo ,(- shift 16)))))
	     (b (logand ?\xffff (logior (ash ,val-lo ,shift) (ash ,val-hi ,(- shift 16))))))
	 (setq ,val-hi a
	       ,val-lo b)))))

  (defun md5-f (x y z)
    `(logior (logand ,x ,y)
             (logand (lognot ,x)

  (defun md5-g (x y z)
    `(logior (logand ,x ,z)
             (logand ,y (lognot ,z)))))

  (defun md5-h (x y z)
    `(logxor ,x ,y ,z)))

  (defun md5-i (x y z)
    `(logxor ,y
             (logior ,x
                     ;; this is normally a lognot, but that would set
                     ;; high bits, and there's no logand to clear them.
                     (logxor ,z ?\xffff)))))

  (defun md5-rewrite (fun w x y z shift)
    "consing reduced form of md5 common step.
requires v-offset, v-idx, vec-hi, vec-lo, t-idx to be defined at
calling point." 
    (flet ((add-lo (x)
		   (intern (concat (symbol-name x) "-lo")))
	   (add-hi (x)
		   (intern (concat (symbol-name x) "-hi"))))
      (let ((w-hi (add-hi w)) (w-lo (add-lo w))
	    (x-hi (add-hi x)) (x-lo (add-lo x))
	    (y-hi (add-hi y)) (y-lo (add-lo y))
	    (z-hi (add-hi z)) (z-lo (add-lo z)))

	   (setq ,w-hi (+ ,w-hi
			  ,(funcall fun x-hi y-hi z-hi)
			  (aref vec-hi (+ v-offset v-idx))
			  (aref md5-t-hi t-idx))
		 ,w-lo (+ ,w-lo
			  ,(funcall fun x-lo y-lo z-lo)
			  (aref vec-lo (+ v-offset v-idx))
			  (aref md5-t-lo t-idx)))

	   (setq ,w-hi (logand ?\xffff 
			       (+ ,w-hi 
				  (ash ,w-lo -16))))
	   (setq ,w-lo (logand ?\xffff ,w-lo))

	   ,(md5<<< w-hi w-lo shift)

	   (incf ,w-lo ,x-lo)

	   (setq ,w-hi (logand ?\xffff 
			       (+ ,w-hi
				  (ash ,w-lo -16))))
	   (setq ,w-lo (logand ?\xffff ,w-lo))

	   (incf t-idx))))))

(defun md5-vectors (vec-hi vec-lo)
  ;; initialize the chaining variables
  (let ((a-hi ?\x6745) (a-lo ?\x2301)
	(b-hi ?\xefcd) (b-lo ?\xab89)
	(c-hi ?\x98ba) (c-lo ?\xdcfe)
	(d-hi ?\x1032) (d-lo ?\x5476)
	(v-offset 0))
    (dotimes (count (/ (length vec-hi) 16))
      (let ((AA-hi a-hi) (BB-hi b-hi) (CC-hi c-hi) (DD-hi d-hi)
	    (AA-lo a-lo) (BB-lo b-lo) (CC-lo c-lo) (DD-lo d-lo)
	    (t-idx 0)
	    ((f (v1 v2 v3 v4 shift)
		     ,(md5-rewrite 'md5-f v1 v2 v3 v4 shift)
		   (incf v-idx))))
	  (setq v-idx 0)
	  (dotimes (count 4)
	    (f a b c d 7)
	    (f d a b c 12)
	    (f c d a b 17)
	    (f b c d a 22)))

	    ((g (v1 v2 v3 v4 shift)
		     ,(md5-rewrite 'md5-g v1 v2 v3 v4 shift)
		   (setq v-idx (logand ?\xf (+ v-idx 5))))))

	  (setq v-idx 1)
	  (dotimes (count 4)
	    (g a b c d 5)
	    (g d a b c 9)
	    (g c d a b 14)
	    (g b c d a 20)))

	    ((h (v1 v2 v3 v4 shift)
		     ,(md5-rewrite 'md5-h v1 v2 v3 v4 shift)
		   (setq v-idx (logand ?\xf (+ v-idx 3))))))
	  (setq v-idx 5)
	  (dotimes (count 4)
	    (h a b c d 4)
	    (h d a b c 11)
	    (h c d a b 16)
	    (h b c d a 23)))

	    ((i (v1 v2 v3 v4 shift)
		     ,(md5-rewrite 'md5-i v1 v2 v3 v4 shift)
		   (setq v-idx (logand ?\xf (+ v-idx 7))))))
	  (setq v-idx 0)
	  (dotimes (count 4)
	    (i a b c d 6)
	    (i d a b c 10)
	    (i c d a b 15)
	    (i b c d a 21)))

	(setq a-lo (+ AA-lo a-lo)
	      b-lo (+ BB-lo b-lo)
	      c-lo (+ CC-lo c-lo)
	      d-lo (+ DD-lo d-lo))
	(setq a-hi (logand ?\xffff
			   (+ AA-hi a-hi
			      (ash a-lo -16)))
	      b-hi (logand ?\xffff
			   (+ BB-hi b-hi
			      (ash b-lo -16)))
	      c-hi (logand ?\xffff
			   (+ CC-hi c-hi
			      (ash c-lo -16)))
	      d-hi (logand ?\xffff
			   (+ DD-hi d-hi
			      (ash d-lo -16))))

	(setq a-lo (logand ?\xffff a-lo)
	      b-lo (logand ?\xffff b-lo)
	      c-lo (logand ?\xffff c-lo)
	      d-lo (logand ?\xffff d-lo))

	(incf v-offset 16)))
    ;; write out LSB-first.  i feel ill.
    (mapconcat #'(lambda (x) (format "%02x%02x" (logand ?\xff x) (ash x -8)))
                a-lo a-hi
                b-lo b-hi
                c-lo c-hi
                d-lo d-hi)

;; clean up the namespace
  (fmakunbound 'md5-rewrite)
  (fmakunbound 'md5<<<)
  (fmakunbound 'md5-f)
  (fmakunbound 'md5-g)
  (fmakunbound 'md5-h)
  (fmakunbound 'md5-i))

(provide 'md5)