Commits

Boolsheet  committed 54b0f79

Lookup tables are now activated with the lut function.

  • Participants
  • Parent commits 3609071

Comments (0)

Files changed (1)

 	They assume unsigned 32-bit integers for all parameters. The functions
 	with the suffix '8' expect unsigned 8-bit integers.
 
-	Setting the global variable BIT_USE_LOOKUP_TABLES to true before loading
-	this script activates the lookup table generation.
+	The function bit_lua.lut activates the lookup tables. They're increasing
+	the speed further at the cost of around 277 KiB of memory.
 
 
 	Function list:
 	bit_lua.lrotate(int, by)
 	bit_lua.rrotate(int, by)
 
+	bit_lua.lut() - Generates lookup tables and replaces the band, bor, and bxor
+	                functions with lookup versions.
+
 ]]
 
 assert((2^48 + 2) - (2^48 + 1) == 1, "Bitwise operations require Lua to be compiled with double precision.")
 for i = 0, 32 do
 	POT_LUT[i] = 2^i
 end
-
+local AND_LUT, OR_LUT, XOR_LUT
 local band, bor, bxor
 local band8, bor8, bxor8
+local lu_band, lu_bor, lu_bxor
+local lu_band8, lu_bor8, lu_bxor8
+
 
 local bit_lua = {}
 
 	return bxor8(byte, byte3, ...)
 end
 
+lu_band = function(int1, int2, int3, ...)
+	local a1 = int1 % 256
+	int1 = (int1 - a1) / 256
+	local a2 = int1 % 256
+	int1 = (int1 - a2) / 256
+	local a3 = int1 % 256
+	int1 = (int1 - a3) / 256
+	local a4 = int1 % 256
 
--- The lookup tables use about 277 KiB. It's almost 4 times faster in my tests.
-if BIT_USE_LOOKUP_TABLES then
-	local AND_LT, OR_LT, XOR_LT = {}, {}, {}
+	local b1 = int2 % 256
+	int2 = (int2 - b1) / 256
+	local b2 = int2 % 256
+	int2 = (int2 - b2) / 256
+	local b3 = int2 % 256
+	int2 = (int2 - b3) / 256
+	local b4 = int2 % 256
+
+	local ret = AND_LUT[a1][b1] + AND_LUT[a2][b2] * 0x100 + AND_LUT[a3][b3] * 0x10000 + AND_LUT[a4][b4] * 0x1000000
+
+	if not int3 then
+		return ret
+	end
+
+	return lu_band(ret, int3, ...)
+end
+
+lu_bor = function(int1, int2, int3, ...)
+	local a1 = int1 % 256
+	int1 = (int1 - a1) / 256
+	local a2 = int1 % 256
+	int1 = (int1 - a2) / 256
+	local a3 = int1 % 256
+	int1 = (int1 - a3) / 256
+	local a4 = int1 % 256
+
+	local b1 = int2 % 256
+	int2 = (int2 - b1) / 256
+	local b2 = int2 % 256
+	int2 = (int2 - b2) / 256
+	local b3 = int2 % 256
+	int2 = (int2 - b3) / 256
+	local b4 = int2 % 256
+
+	local ret = OR_LUT[a1][b1] + OR_LUT[a2][b2] * 0x100 + OR_LUT[a3][b3] * 0x10000 + OR_LUT[a4][b4] * 0x1000000
+
+	if not int3 then
+		return ret
+	end
+
+	return lu_bor(ret, int3, ...)
+end
+
+lu_bxor = function(int1, int2, int3, ...)
+	local a1 = int1 % 256
+	int1 = (int1 - a1) / 256
+	local a2 = int1 % 256
+	int1 = (int1 - a2) / 256
+	local a3 = int1 % 256
+	int1 = (int1 - a3) / 256
+	local a4 = int1 % 256
+
+	local b1 = int2 % 256
+	int2 = (int2 - b1) / 256
+	local b2 = int2 % 256
+	int2 = (int2 - b2) / 256
+	local b3 = int2 % 256
+	int2 = (int2 - b3) / 256
+	local b4 = int2 % 256
+
+	local ret = XOR_LUT[a1][b1] + XOR_LUT[a2][b2] * 0x100 + XOR_LUT[a3][b3] * 0x10000 + XOR_LUT[a4][b4] * 0x1000000
+
+	if not int3 then
+		return ret
+	end
+
+	return lu_bxor(ret, int3, ...)
+end
+
+lu_band8 = function(byte1, byte2, byte3, ...)
+	byte2 = AND_LUT[byte1][byte2]
+
+	if not byte3 then
+		return byte2
+	end
+
+	return lu_band8(byte2, byte3, ...)
+end
+
+lu_bor8 = function(byte1, byte2, byte3, ...)
+	byte2 = OR_LUT[byte1][byte2]
+
+	if not byte3 then
+		return byte2
+	end
+
+	return lu_bor8(byte2, byte3, ...)
+end
+
+lu_bxor8 = function(byte1, byte2, byte3, ...)
+	byte2 = XOR_LUT[byte1][byte2]
+
+	if not byte3 then
+		return byte2
+	end
+
+	return lu_bxor8(byte2, byte3, ...)
+end
+
+bit_lua.bnot = function(int)
+	return 4294967295 - int
+end
+
+bit_lua.bnot8 = function(byte)
+	return 255 - byte
+end
+
+
+bit_lua.lshift = function(int, by)
+	return (int * POT_LUT[by]) % POW32
+end
+
+bit_lua.rshift = function(int, by)
+	local shifted = int / POT_LUT[by]
+	return shifted - shifted % 1
+end
+
+bit_lua.arshift = function(int, by)
+	local pf = POT_LUT[by]
+	local shifted = int / pf
+	return shifted - shifted % 1 + ((int >= POW31 and (pf - 1) * POT_LUT[32-by]) or 0)
+end
+
+
+bit_lua.lrotate = function(int, by)
+	local shifted = int / POT_LUT[32-by]
+	local fraction = shifted % 1
+	return (shifted - fraction) + fraction * POW32
+end
+
+bit_lua.rrotate = function(int, by)
+	local shifted = int / POT_LUT[by]
+	local fraction = shifted % 1
+	return (shifted - fraction) + fraction * POW32
+end
+
+
+local function generate_lookup_tables()
+	if AND_LUT and OR_LUT and XOR_LUT then
+		return
+	end
+
+	AND_LUT, OR_LUT, XOR_LUT = {}, {}, {}
 
 	for b1 = 0, 255 do
-		AND_LT[b1], OR_LT[b1], XOR_LT[b1] = {}, {}, {}
+		AND_LUT[b1], OR_LUT[b1], XOR_LUT[b1] = {}, {}, {}
 
 		for b2 = 0, 255 do
-			AND_LT[b1][b2] = band8(b1, b2)
-			OR_LT[b1][b2] = bor8(b1, b2)
-			XOR_LT[b1][b2] = bxor8(b1, b2)
+			AND_LUT[b1][b2] = band8(b1, b2)
+			OR_LUT[b1][b2] = bor8(b1, b2)
+			XOR_LUT[b1][b2] = bxor8(b1, b2)
 		end
 	end
+end
 
-	band = function(int1, int2, int3, ...)
-		local a1 = int1%256
-		int1 = (int1-a1)/256
-		local a2 = int1%256
-		int1 = (int1-a2)/256
-		local a3 = int1%256
-		int1 = (int1-a3)/256
-		local a4 = int1%256
+bit_lua.lut = function()
+	generate_lookup_tables()
 
-		local b1 = int2%256
-		int2 = (int2-b1)/256
-		local b2 = int2%256
-		int2 = (int2-b2)/256
-		local b3 = int2%256
-		int2 = (int2-b3)/256
-		local b4 = int2%256
+	bit_lua.band = lu_band
+	bit_lua.bor = lu_bor
+	bit_lua.bxor = lu_bxor
 
-		local ret = AND_LT[a1][b1] + AND_LT[a2][b2]*256 + AND_LT[a3][b3]*65536 + AND_LT[a4][b4]*16777216
-
-		if not int3 then
-			return ret
-		end
-
-		return band(ret, int3, ...)
-	end
-
-	bor = function(int1, int2, int3, ...)
-		local a1 = int1%256
-		int1 = (int1-a1)/256
-		local a2 = int1%256
-		int1 = (int1-a2)/256
-		local a3 = int1%256
-		int1 = (int1-a3)/256
-		local a4 = int1%256
-
-		local b1 = int2%256
-		int2 = (int2-b1)/256
-		local b2 = int2%256
-		int2 = (int2-b2)/256
-		local b3 = int2%256
-		int2 = (int2-b3)/256
-		local b4 = int2%256
-
-		local ret = OR_LT[a1][b1] + OR_LT[a2][b2]*256 + OR_LT[a3][b3]*65536 + OR_LT[a4][b4]*16777216
-
-		if not int3 then
-			return ret
-		end
-
-		return bor(ret, int3, ...)
-	end
-
-	bxor = function(int1, int2, int3, ...)
-		local a1 = int1%256
-		int1 = (int1-a1)/256
-		local a2 = int1%256
-		int1 = (int1-a2)/256
-		local a3 = int1%256
-		int1 = (int1-a3)/256
-		local a4 = int1%256
-
-		local b1 = int2%256
-		int2 = (int2-b1)/256
-		local b2 = int2%256
-		int2 = (int2-b2)/256
-		local b3 = int2%256
-		int2 = (int2-b3)/256
-		local b4 = int2%256
-
-		local ret = XOR_LT[a1][b1] + XOR_LT[a2][b2]*256 + XOR_LT[a3][b3]*65536 + XOR_LT[a4][b4]*16777216
-
-		if not int3 then
-			return ret
-		end
-
-		return bxor(ret, int3, ...)
-	end
+	bit_lua.band8 = lu_band8
+	bit_lua.bor8 = lu_bor8
+	bit_lua.bxor8 = lu_bxor8
 end
 
 
 bit_lua.bxor8 = bxor8
 
 
-bit_lua.bnot = function(int)
-	return 4294967295 - int
-end
-
-bit_lua.bnot8 = function(byte)
-	return 255 - byte
-end
-
-
-bit_lua.lshift = function(int, by)
-	return (int * POT_LUT[by])%POW32
-end
-
-bit_lua.rshift = function(int, by)
-	local shifted = int / POT_LUT[by]
-	return shifted - shifted%1
-end
-
-bit_lua.arshift = function(int, by)
-	local pf = POT_LUT[by]
-	local shifted = int / pf
-	return shifted - shifted%1 + ((int>=POW31 and (pf-1) * POT_LUT[32-by]) or 0)
-end
-
-
-bit_lua.lrotate = function(int, by)
-	local shifted = int / POT_LUT[32-by]
-	local fraction = shifted%1
-	return (shifted-fraction) + fraction*POW32
-end
-
-bit_lua.rrotate = function(int, by)
-	local shifted = int / POT_LUT[by]
-	local fraction = shifted%1
-	return (shifted - fraction) + fraction*POW32
-end
-
-
 return bit_lua