Created
August 20, 2020 14:54
-
-
Save djspiewak/2631fbcf25cedfbd98bd2252db3028b3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// we really want to vectorize something like this: | |
val bytes: Array[Byte] = ??? | |
val results: Array[Byte] = new Array[Byte](bytes.length) | |
var i = 0 | |
while (i < bytes.length) { // assume bytes.length % 4 == 0 | |
results(i) |= bytes(i) & '"' | |
} | |
// the idea is to look for quote characters | |
// | |
// anyway, this *almost* works, but bitwise operations | |
// are not defined on bytes (ironically), only on int/long | |
// so this triggers coercions, which messes up the auto | |
// vectorization | |
// what we need to do is get it into Array[Int] form... | |
// ...without copying it | |
// the problem here is that Array[Byte] != Array[Int], which | |
// the JVM will happily inform us of if we attempt to cast | |
bytes.asInstanceOf[Array[Int]] // => ClassCastException | |
// so we have to trick it! 😈 | |
import sun.misc.Unsafe // oooooh boy here we go! | |
val f = classOf[Unsafe].getDeclaredField("theUnsafe") | |
f.setAccessible(true) | |
val unsafe = f.get(null).asInstanceOf[Unsafe] | |
// we can use Unsafe to dig into the Array representation. | |
// spoiler alert: the class tag is stored in the header, | |
// meaning that we can modify that tag without touching | |
// anything else, and trick the JVM into thinking that | |
// bytes is *actually* an Array[Int] | |
// Array[Byte](1, 2, 3, 4) looks like this in memory: | |
// | |
// 0| 09 00 00 00 | |
// 4| 00 00 00 00 | |
// 8| f5 00 00 f8 <-- this is the class tag! | |
// 12| 04 00 00 00 <-- length | |
// 16| 01 02 03 04 <-- data | |
// the length of the leading header is Unsafe.ADDRESS_SIZE | |
val TagOffset = Unsafe.ADDRESS_SIZE.toLong | |
val BytesClassTag = unsafe.getInt(bytes, TagOffset) | |
val IntsClassTag = unsafe.getInt(Array[Int](0), TagOffset) | |
// evil mode: engage! | |
unsafe.putInt(bytes, TagOffset, IntsClassTag) | |
val ints = bytes.asInstanceOf[Array[Int]] // holy moly it works! | |
// so that's a static_cast, for those of you keeping score at home | |
// no data has been copied | |
// note though that byte arrays are *packed*, so the meaning of | |
// ints.length is, uh, different and wrong. But fortunately, wrong | |
// in a good way. Imagine we had somehow saved off bytes before | |
// performing our witchcraft: | |
(ints(0) >> 0) & 0xFF == originalBytes(0) | |
(ints(0) >> 8) & 0xFF == originalBytes(1) | |
(ints(0) >> 16) & 0xFF == originalBytes(2) | |
(ints(0) >> 24) & 0xFF == originalBytes(3) | |
// in other words, each set of four bytes is packed into one int | |
val results: Array[Int] = new Array[Int](bytes.length / 4) | |
val sigil = '"' | ('"' << 8) | ('"' << 16) | ('"' << 24) | |
var i = 0 | |
while (i < ints.length / 4) { | |
results(i) |= ints(i) & sigil | |
} | |
// this vectorizes. get rekt, managed runtime | |
unsafe.putInt(bytes, TagOffset, BytesClassTag) | |
// ...whistles innocently |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment