- Ornedan
- Nov 4, 2009
-
-
Cybernetic Crumb
|
I'm trying to figure out the fastest way to reverse way to reverse bits in a byte.
For context I'm trying to make some graphics stuff (dithering) go really fast and I finally hit a point where writing pixels through the whole BufferedImage, ColorModel, Raster, SampleModel and DataBuffer sequence dominates the per-pixel time. So it's time to write specialised code for common combinations that fucks directly with the image bits and chop off another 50% of time required.
Ways to reverse bits I've found and tried so far:
Java code:import java.util.Random;
import java.util.function.Function;
public class ReverseBitsInByteTest
{
public static byte reverseStdlib(byte b)
{
return (byte)(Integer.reverse(b & 0xFF) >> 24);
}
public static byte reverseSwaps(byte b)
{
int r = (b & 0b11110000) >> 4 | (b & 0b00001111) << 4;
r = (r & 0b11001100) >> 2 | (r & 0b00110011) << 2;
r = (r & 0b10101010) >> 1 | (r & 0b01010101) << 1;
return (byte)r;
}
private static final int[] lookupReverseNybble =
{
0x0,0x8,0x4,0xc,0x2,0xa,0x6,0xe,0x1,0x9,0x5,0xd,0x3,0xb,0x7,0xf,
};
public static byte reverseNybbles(byte b)
{
return (byte)((lookupReverseNybble[b & 0b00001111] << 4) | lookupReverseNybble[(b & 0b11110000) >> 4]);
}
private static final int[] lookupReverseByte =
{
0x0,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,
0xf0,0x8,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,
0x78,0xf8,0x4,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,
0xb4,0x74,0xf4,0xc,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,
0x3c,0xbc,0x7c,0xfc,0x2,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,
0xd2,0x32,0xb2,0x72,0xf2,0xa,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,
0x5a,0xda,0x3a,0xba,0x7a,0xfa,0x6,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,
0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,0xe,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,
0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,0x1,0x81,0x41,0xc1,0x21,0xa1,0x61,
0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,0x9,0x89,0x49,0xc9,0x29,0xa9,
0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,0x5,0x85,0x45,0xc5,0x25,
0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,0xd,0x8d,0x4d,0xcd,
0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,0x3,0x83,0x43,
0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,0xb,0x8b,
0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,0x7,
0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
0xf,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,
0xff,
};
public static byte reverseLookup(byte b)
{
return (byte)lookupReverseByte[b & 0xFF];
}
/** http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64BitsDiv */
public static byte reverseBithack3(byte b)
{
return (byte)((((b & 0xFF) * 0x0202020202l) & 0x010884422010l) % 1023);
}
/** http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits */
public static byte reverseBithack4(byte b)
{
return (byte)((((b & 0xFF) * 0x80200802l) & 0x0884422110l) * 0x0101010101l >> 32);
}
public static String toPaddedBinStr(int b, int pad)
{
String str = Integer.toBinaryString(b);
while(str.length() < pad)
str = '0' + str;
return str;
}
public static String toPaddedBinStr(long b, int pad)
{
String str = Long.toBinaryString(b);
while(str.length() < pad)
str = '0' + str;
return str;
}
public static void check(String name,Function<Byte,Byte> func)
{
for(int b = 0; b <= 0xFF; b++)
{
int rev = func.apply((byte)b) & 0xFF;
String binStr = toPaddedBinStr(b,8);
String revStr = toPaddedBinStr(rev,8);
boolean correct =
new StringBuilder(binStr).reverse().toString().equals(revStr);
if( !correct)
System.out.printf("%s failed: %s => %s\n",name,binStr,revStr);
}
}
public static void main(String[] args) throws Throwable
{
// Correctness checks
check("reverseStdlib",ReverseBitsInByteTest::reverseStdlib);
check("reverseSwaps",ReverseBitsInByteTest::reverseSwaps);
check("reverseNybbles",ReverseBitsInByteTest::reverseNybbles);
check("reverseLookup",ReverseBitsInByteTest::reverseLookup);
check("reverseBithack3",ReverseBitsInByteTest::reverseBithack3);
check("reverseBithack4",ReverseBitsInByteTest::reverseBithack4);
// Speed testing
byte[] data = new byte[1000000];
new Random().nextBytes(data);
long beginStdlib = System.currentTimeMillis();
for(int repeats = 0; repeats < 1000; repeats++)
for(int i = 0; i < data.length; i++)
data[i] = reverseStdlib(data[i]);
long endStdlib = System.currentTimeMillis();
long beginSwaps = System.currentTimeMillis();
for(int repeats = 0; repeats < 1000; repeats++)
for(int i = 0; i < data.length; i++)
data[i] = reverseSwaps(data[i]);
long endSwaps = System.currentTimeMillis();
long beginNybbles = System.currentTimeMillis();
for(int repeats = 0; repeats < 1000; repeats++)
for(int i = 0; i < data.length; i++)
data[i] = reverseNybbles(data[i]);
long endNybbles = System.currentTimeMillis();
long beginLookup = System.currentTimeMillis();
for(int repeats = 0; repeats < 1000; repeats++)
for(int i = 0; i < data.length; i++)
data[i] = reverseLookup(data[i]);
long endLookup = System.currentTimeMillis();
long beginBithack3 = System.currentTimeMillis();
for(int repeats = 0; repeats < 1000; repeats++)
for(int i = 0; i < data.length; i++)
data[i] = reverseBithack3(data[i]);
long endBithack3 = System.currentTimeMillis();
long beginBithack4 = System.currentTimeMillis();
for(int repeats = 0; repeats < 1000; repeats++)
for(int i = 0; i < data.length; i++)
data[i] = reverseBithack4(data[i]);
long endBithack4 = System.currentTimeMillis();
System.out.printf("Stdlib: %d msec\n",endStdlib - beginStdlib);
System.out.printf("Swaps: %d msec\n",endSwaps - beginSwaps);
System.out.printf("Nybbles: %d msec\n",endNybbles - beginNybbles);
System.out.printf("Lookup: %d msec\n",endLookup - beginLookup);
System.out.printf("Bithack3: %d msec\n",endBithack3 - beginBithack3);
System.out.printf("Bithack4: %d msec\n",endBithack4 - beginBithack4);
}
}
The weird thing I'm running into is that I get completely different results depending on whether that test class is compiled with javac on command line or Eclipse:
pre:javac | Eclipse
--------------------|--------------------
Stdlib: 1736 msec | Stdlib: 1661 msec
Swaps: 3528 msec | Swaps: 1531 msec
Nybbles: 2829 msec | Nybbles: 780 msec
Lookup: 2366 msec | Lookup: 459 msec
Bithack3: 3513 msec | Bithack3: 1087 msec
Bithack4: 2810 msec | Bithack4: 586 msec
Are these some optimisation flags Eclipse enables by default that could cause the difference?
e: oh wtf, whichever is the first one I measure time for is as fast in the javac-compiled bytecode as in Eclipse-compiled. So the javac results change if I reorder the speed tests.
Ornedan fucked around with this message at 15:03 on Jul 9, 2023
|