Commits

Eric Roshan Eisner  committed 644f92e Draft

unsafe branch: use direct copy on x86 machines

benchmark old ns/op new ns/op delta
BenchmarkHash1K 11441 10726 -6.25%
BenchmarkHash8K 90885 85156 -6.30%
BenchmarkShortHash 2138 2044 -4.40%

benchmark old MB/s new MB/s speedup
BenchmarkHash1K 89.50 95.46 1.07x
BenchmarkHash8K 90.13 96.20 1.07x

  • Participants
  • Parent commits e7e3280
  • Branches unsafe

Comments (0)

Files changed (3)

+// Copyright 2012 Eric Roshan-Eisner. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 386 !appengine
+
+package sha3
+
+import "unsafe"
+
+func readUint64(b []byte) uint64 {
+	return *(*uint64)(unsafe.Pointer(&b[0]))
+}
+
+func writeUint64(b []byte, v uint64) {
+	*(*uint64)(unsafe.Pointer(&b[0])) = v
+}
+
+// Alternative to looping, but only provides 1% benchmark speedup.
+func (d *keccak) tableapply(chunk []byte) {
+	switch d.blocksize {
+	case 144: // 224 bit
+		d.state[136/8] ^= readUint64(chunk[136:])
+		fallthrough
+	case 136: // 256 bit
+		d.state[128/8] ^= readUint64(chunk[128:])
+		d.state[120/8] ^= readUint64(chunk[120:])
+		d.state[112/8] ^= readUint64(chunk[112:])
+		d.state[104/8] ^= readUint64(chunk[104:])
+		fallthrough
+	case 104: // 384 bit
+		d.state[96/8] ^= readUint64(chunk[96:])
+		d.state[88/8] ^= readUint64(chunk[88:])
+		d.state[80/8] ^= readUint64(chunk[80:])
+		d.state[72/8] ^= readUint64(chunk[72:])
+		fallthrough
+	case 72: // 512 bit
+		d.state[64/8] ^= readUint64(chunk[64:])
+		d.state[56/8] ^= readUint64(chunk[56:])
+		d.state[48/8] ^= readUint64(chunk[48:])
+		d.state[40/8] ^= readUint64(chunk[40:])
+		d.state[32/8] ^= readUint64(chunk[32:])
+		d.state[24/8] ^= readUint64(chunk[24:])
+		d.state[16/8] ^= readUint64(chunk[16:])
+		d.state[8/8] ^= readUint64(chunk[8:])
+		d.state[0/8] ^= readUint64(chunk[0:])
+	}
+}
+// Copyright 2012 Eric Roshan-Eisner. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine
+// +build !386 appengine
+
+package sha3
+
+func readUint64(b []byte) uint64 {
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func writeUint64(b []byte, v uint64) {
+	b[0] = byte(v)
+	b[1] = byte(v >> 8)
+	b[2] = byte(v >> 16)
+	b[3] = byte(v >> 24)
+	b[4] = byte(v >> 32)
+	b[5] = byte(v >> 40)
+	b[6] = byte(v >> 48)
+	b[7] = byte(v >> 56)
+}
 package sha3
 
 import (
-	"encoding/binary"
 	"hash"
 )
 
 // KeccakF[1600] permutation.
 func (d *keccak) absorb(chunk []byte) {
 	for i := 0; i < d.blocksize; i += 8 {
-		d.state[i/8] ^= binary.LittleEndian.Uint64(chunk[i : i+8])
+		d.state[i/8] ^= readUint64(chunk[i:])
 	}
 	block(&d.state)
 }
 	d.buf[d.blocksize-1] |= 0x80
 
 	for i := 0; i < d.blocksize; i += 8 {
-		d.state[i/8] ^= binary.LittleEndian.Uint64(d.buf[i : i+8])
+		d.state[i/8] ^= readUint64(d.buf[i:])
 	}
 	d.closed = true
 }
 	}
 	block(&d.state)
 	for i := 0; i < d.blocksize; i += 8 {
-		binary.LittleEndian.PutUint64(d.buf[i:i+8], d.state[i/8])
+		writeUint64(d.buf[i:], d.state[i/8])
 	}
 }