H7ctf 2025
applemalmal
This was an interesting malware reversing inspired chal, that I managed to tackle pretty easily with emulation. I managed to get through all of the reversing challenges of this ctf, and this one had the least solves.
The challenge provided AppleMeowMeow.exe, and files checksum.txt and pumpkin.html.enc.
Analyzing the executable arguments
First, the unpacker walks the section headers of its own exe, looping through each section header to check for a tExt section.
// ...
section_header_i = 0;
if ( !num_sections )
{
continue:
CloseHandle(fp);
return 2;
}
while ( 1 )
{
if ( !ReadFile(fp, §ion_name, 0x28u, &NumberOfBytesRead, 0) || NumberOfBytesRead != 40 )
goto LABEL_54;
LOBYTE(scratch[2]) = 0;
*(_QWORD *)scratch = section_name;
if ( !j_strncmp((const char *)scratch, "tExt", 4u) )
break;
if ( ++section_header_i >= num_sections )
goto continue;
}
*(__m128i *)scratch = v47;
if ( (unsigned int)_mm_cvtsi128_si32(v47) <= 0x10 )
{
CloseHandle(fp);
return 3;
}
// ...
It then reads this section into a buffer, and uses it to build some arguments that will be used later.
IDA likes to inline sse instrinsics so it was kind of hard to annotate this part, sorry.
// v47 starts at offset 16 in section table header.
if ( (unsigned int)_mm_cvtsi128_si32(v47) <= 16 ) // if SizeOfRawData <= 16
{
CloseHandle(fp);
return 3;
}
// (lowers to pslldq) shift 4 bytes -> offset 20 (PointerToRawData) zx
if ( SetFilePointer(fp, _mm_cvtsi128_si32(_mm_srli_si128(v47, 4)), 0, 0) == -1 && GetLastError() )// PointerToRawData
goto err;
v10 = scratch[0]; // SizeOfRawData
buf = (__int128 *)j_j_j__malloc_base(scratch[0]);
v13 = v12;
v36 = v12;
if ( !v12 )
goto err;
if ( !ReadFile(fp, buf, v10, &NumberOfBytesRead, 0) || NumberOfBytesRead != v10 )
{
j_j_j__free_base(v13);
err:
CloseHandle(fp);
return 1;
}
CloseHandle(fp);
The first 16 bytes (an single int128_t) is a seed or key, and the rest of the section data is the cyphertext.
__int128_t key = *bufp;
cipher_len = nb - 16;
*(_QWORD *)scratch = bufp + 1; // (bufp is an int128*)
outbuf = j_j_j__malloc_base(cipther_len); // will write decrypted cypher here later
// ...
Building each stub
Now, we can approach the first stub. In very nostalgic fashion, it is built straight from a table in the file.
_OWORD *stub1_buf = VirtualAlloc(0, 0xD7u, 0x3000u, 4u);
current_stub = stub1_buf;
lpAddress = stub1_buf;
if ( !stub1_buf )
goto alloc_err;
// build the stub out of 16B tiles
*stub1_buf = loc_14009C0F0;
stub1_buf[1] = loc_14009C100;
stub1_buf[2] = *(_OWORD *)((char *)&loc_14009C10F + 1);
stub1_buf[3] = *(_OWORD *)((char *)&loc_14009C11E + 2);
stub1_buf[4] = loc_14009C130;
stub1_buf[5] = *(_OWORD *)((char *)&loc_14009C13F + 1);
stub1_buf[6] = loc_14009C150;
stub1_buf[7] = *(_OWORD *)((char *)&loc_14009C15E + 2);
stub1_buf[8] = loc_14009C170;
stub1_buf[9] = loc_14009C180;
stub1_buf[10] = *(_OWORD *)((char *)&loc_14009C18E + 2);
stub1_buf[11] = *(_OWORD *)((char *)&loc_14009C19D + 3);
stub1_buf[12] = *(_OWORD *)((char *)&loc_14009C1AE + 2);
// fixups
*((_DWORD *)stub1_buf + 52) = loc_14009C1C0;
*((_WORD *)stub1_buf + 106) = *(_WORD *)((char *)&loc_14009C1C1 + 3);
*((_BYTE *)stub1_buf + 214) = locret_14009C1C6;
// call the stub
goto LABEL_35;
((void (__fastcall *)(_BYTE *, __int128 *, __int64))current_stub)(buff, &key, 16); // buf is a 256 byte array on stack.
VirtualFree(current_stub, 0, 0x8000u);
v22 = VirtualAlloc(0, 0xE6u, 0x3000u, 4u);
As you can see, it builds the stub out of 16 byte tiles from its memory (+ a couple of fixups at the end). The stub is passed a 256 byte stack buffer to use for output, and the 16 byte key we derived before.
There are a couple of ways to tackle this; I opted to use the RVAs from each section header to set up va mappings manually via unicorn, and then built the buffer by reading the tiles just like the loader itself.
STUB1_TILES = [
(0x9C0F0, 16), (0x9C100, 16),
(0x9C10F+1,16), (0x9C11E+2,16),
(0x9C130, 16), (0x9C13F+1,16),
(0x9C150, 16), (0x9C15E+2,16),
(0x9C170, 16), (0x9C180, 16),
(0x9C18E+2,16), (0x9C19D+3,16),
(0x9C1AE+2,16),
]
# ...
# map image
base = pe.OPTIONAL_HEADER.ImageBase
total = pc(pe.OPTIONAL_HEADER.SizeOfImage)
a = pf(base); b = pc(base + total); p = a
while p < b:
try: uc.mem_map(p, PAGE, UC_PROT_READ|UC_PROT_WRITE|UC_PROT_EXEC)
except UcError: pass
p += PAGE
for s in pe.sections:
va = base + s.VirtualAddress # base + rva
raw = s.get_data()
vsize = max(len(raw), int(s.Misc_VirtualSize or 0))
uc.mem_write(va, raw)
if vsize > len(raw):
uc.mem_write(va+len(raw), b"\0"*(vsize-len(raw)))
# rebuild stub1
buf = bytearray(STUB1_SIZE)
off = 0
for rva, sz in STUB1_TILES:
buf[off:off+sz] = uc.mem_read(base + rva, sz); off += sz
buf[52*4:52*4+4] = uc.mem_read(base + 0x9C1C0, 4)
buf[106*2:106*2+2] = uc.mem_read(base + 0x9C1C1 + 3, 2)
buf[214] = uc.mem_read(base + 0x9C1C6, 1)[0]
stub1 = bytes(buf)
Stub 2
This was the exact same process as with stub 1
VirtualFree(current_stub, 0, 0x8000u); // free the rwx pages we mapped for stub1
stub2_final = VirtualAlloc(0, 0xE6u, 0x3000u, 4u);
current_stub = stub2_final;
*(_QWORD *)scratch = stub2_final;
if ( !stub2_final )
goto alloc_err;
*stub2_final = loc_14009C000;
stub2_final[1] = *(_OWORD *)(0x140000000LL + 638992);
stub2_final[2] = *(_OWORD *)(0x140000000LL + 639008);
stub2_final[3] = *(_OWORD *)(0x140000000LL + 639024);
stub2_final[4] = *(_OWORD *)(0x140000000LL + 639040);
stub2_final[5] = *(_OWORD *)(0x140000000LL + 639056);
stub2_final[6] = *(_OWORD *)(0x140000000LL + 639072);
stub2_final[7] = *(_OWORD *)(0x140000000LL + 639088);
stub2_final[8] = *(_OWORD *)(0x140000000LL + 639104);
stub2_final[9] = *(_OWORD *)(0x140000000LL + 639120);
stub2_final[10] = *(_OWORD *)(0x140000000LL + 639136);
stub2_final[11] = *(_OWORD *)(0x140000000LL + 639152);
stub2_final[12] = *(_OWORD *)(0x140000000LL + 639168);
stub2_final[13] = *(_OWORD *)(0x140000000LL + 639184);
*((_DWORD *)stub2_final + 56) = *(_DWORD *)(0x140000000LL + 639200);
*((_WORD *)stub2_final + 114) = *(_WORD *)(0x140000000LL + 639204);
v23 = GetCurrentProcess();
FlushInstructionCache(v23, current_stub, 0xE6u);
if ( VirtualProtect(current_stub, 0xE6u, 0x20u, &SizeOfOptionalHeader) )
{
((void (__fastcall *)(_BYTE *, __int128 *, void *, size_t))current_stub)(buff, bufp + 1, outbuf, cipher_len);
// ...
Clearly, we can use the same method as before to build this stub, all we need to do is change the addresses and fixups.
Note that this time, the stub is passed the 256 byte output of stub1 (buff), the cipher from tExt ((int128_t)bufp + 1), the output buffer allocated before (same length as cipher), and the length of the cipher. In other words, it uses the output buffer to decode the cipher.
Now, lets look at what the rest of the loader does.
VirtualFree(current_stub, 0, 0x8000u);
if ( GetTempPathW(0x104u, tmp_path) )
{
TickCount = GetTickCount();
CurrentProcessId = GetCurrentProcessId();
wsprintfW(payload_exe_name, L"meo_%lu_%lu.exe", CurrentProcessId, TickCount);
FileName[0] = 0;
sub_14000359E((__int64)FileName, (__int64)tmp_path, 259);// concat path + filename
v26 = sub_1400012EE((__int64)FileName); // basename(abspath)
if ( v26 && FileName[v26 - 1] != '\\' && (unsigned __int64)(v26 + 1) < 0x104 )
*(_DWORD *)&FileName[v26] = '\\';
v27 = sub_1400012EE((__int64)FileName);
sub_14000359E((__int64)FileName, (__int64)payload_exe_name, 259 - v27);
payload_exe = CreateFileW(FileName, 0x40000000u, 0, 0, 2u, 0x80u, 0);
v29 = payload_exe;
if ( payload_exe != (HANDLE)-1LL )
{
if ( WriteFile(payload_exe, outbuf, cipher_len, &NumberOfBytesWritten, 0) && NumberOfBytesWritten == cipher_len )
{
CloseHandle(v29);
wsprintfW(CommandLine, L"cmd.exe /c \"%s\"", FileName);
memset(&StartupInfo.cb + 1, 0, 100);
StartupInfo.cb = 104;
memset(&ProcessInformation, 0, sizeof(ProcessInformation));
if ( CreateProcessW(0, CommandLine, 0, 0, 0, 0, 0, 0, &StartupInfo, &ProcessInformation)
|| CreateProcessW(FileName, 0, 0, 0, 0, 0, 0, 0, &StartupInfo, &ProcessInformation) )
{
WaitForSingleObject(ProcessInformation.hProcess, 0xFFFFFFFF);
ExitCode = 0;
GetExitCodeProcess(ProcessInformation.hProcess, &ExitCode);
CloseHandle(ProcessInformation.hThread);
CloseHandle(ProcessInformation.hProcess);
sub_140002586("DONE !\n");
DeleteFileW(FileName);
j_j_j__free_base(outbuf);
j_j_j__free_base(bufp);
return 0;
}
}
First, an exe is created in a tmp directory, and the output buffer of stub2 is directly written to this file. Then, the executable is run. After it has exited, the loader also exits.
Emulating
Luckily, each stubs seems like it is designed to be relocatable, so we don’t need to set up any super specific stack or heap structure.
I just mapped a bunch (making them huge avoids emu errors lol, if you have a slow pc you can make them way smaller) of scratch pages to use for the arguments, and set up a fake stack so we can gracefully stop emulating when the stub is finished. honestly now that im looking at this it probably would have been better to just map a page or two for each argument.
STACK_BASE = base + 0x02000000
uc.mem_map(p)
for a,sz in [(HEAP_BASE,0x2000000), (STACK_BASE,0x00200000)]:
p=a
p1=a + sz
while p<p1:
uc.mem_map(p, PAGE, UC_PROT_READ|UC_PROT_WRITE|UC_PROT_EXEC)
p+=PAGE_SIZE
# fake saved rip we will put on stack, it still needs a page to avoid segfault on fetch
# We will emu.start(..., until=RET_TRAMP) so we can get the emu to stop after emulating each stub
RET_TRAMP = 0x70000000
uc.mem_map(pf(RET_TRAMP), PAGE, UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC)
uc.mem_write(RET_TRAMP, b"\xC3")
HEAP_BASE = base + 0x04000000
STUB1_ADDR = HEAP_BASE + 0x1000
STUB2_ADDR = HEAP_BASE + 0x3000
BUFF_PTR = HEAP_BASE + 0x8000
SEED_PTR = HEAP_BASE + 0x9000
IN_PTR = HEAP_BASE + 0xB000
OUT_PTR = HEAP_BASE + 0x20000
uc.mem_write(STUB1_ADDR, stub1)
uc.mem_write(STUB2_ADDR, stub2)
uc.mem_write(SEED_PTR, seed)
uc.mem_write(IN_PTR, cipher)
uc.mem_write(OUT_PTR, b"\x00"*len(cipher))
rsp = STACK_BASE + STACK_SIZE - 0x1000
uc.reg_write(UC_X86_REG_RSP, rsp-8)
uc.mem_write(rsp-8, RET.to_bytes(8,"little"))
After this setup, we can just pass the args according to the __fastcall cconv and call the stubs.
uc.reg_write(UC_X86_REG_RCX, BUFF_PTR) # buff
uc.reg_write(UC_X86_REG_RDX, SEED_PTR) # in
uc.reg_write(UC_X86_REG_R8, 16) # len
uc.emu_start(STUB1_ADDR, until=RET_TRAMP)
# setup stack again for stub2
rsp = STACK_BASE + STACK_SIZE - 0x1000
uc.reg_write(UC_X86_REG_RSP, rsp-8)
uc.mem_write(rsp-8, RET.to_bytes(8,"little"))
uc.reg_write(UC_X86_REG_RCX, BUFF_PTR) # buff
uc.reg_write(UC_X86_REG_RDX, IN_PTR) # cypher
uc.reg_write(UC_X86_REG_R8, OUT_PTR) # out
uc.reg_write(UC_X86_REG_R9, len(cipher)) # len
uc.emu_start(STUB2_ADDR, RET_TRAMP)
# dump the exe to disk
stage2_exe = uc.mem_read(OUT_PTR, len(cipher))
out = exe.with_suffix(".unpacked.exe")
out.write_bytes(stage2_exe)
After running our script, we have the unpacked second stage!
Analyzing the second stage
Going by the pdb, this binary was compiled with the name Ransom.
There is a lot of stuff in this binary! it tries to do some pretty standard anti-debug stuff like setting ThreadHideFromDebugger, and it will try and shut down your computer if you input the wrong checksum.
int64_t main(int32_t argc, void* char **argv)
void s
__builtin_memset(&s, c: 0xcccccccc, n: 0x518)
int64_t rax_1 = __security_cookie ^ &s
j___CheckForDebuggerJustMyCode(&data_1400270a4)
void expected_hash
__builtin_memset(s: &expected_hash, c: 0, n: 0x20)
void self_sha256
__builtin_memset(s: &self_sha256, c: 0, n: 0x20)
int64_t pNtSetInformationThread = GetProcAddress(hModule: GetModuleHandleA(lpModuleName: "ntdll"), lpProcName: "NtSetInformationThread")
if (pNtSetInformationThread != 0)
pNtSetInformationThread(GetCurrentThread(), ThreadHideFromDebugger, 0, 0)
void lpFilename
uint32_t rax_4 = GetModuleFileNameA(hModule: nullptr, &lpFilename, nSize: 0x104)
int64_t result
if (rax_4 == 0 || zx.q(rax_4) u>= 0x104)
fail();
else if (zx.d(sha256(&lpFilename, &self_sha256)) != 0)
PSTR checksum_file
if (argc s< 2)
checksum_file = "checksum.txt"
else
checksum_file = *(char **argv + 8)
if (zx.d(load_hash(checksum_file, &expected_hash)) != 0)
fail();
if (compare_digests(&self_sha256, &expected_hash, 0x20) != 0)
// ...
MessageBoxA(hWnd: nullptr, &lpText, lpCaption: "G00d bye HACKER !", uType: MB_ICONASTERISK)
// ..
// LOL
Sleep(dwMilliseconds: 0x1f40)
antidebug_shutdown_computer()
ExitWindowsEx(uFlags: EWX_FORCEIFHUNG, dwReason: SHTDN_REASON_FLAG_PLANNED)
void var_758
sub_1400121e0(&var_758, 0x100)
result = j_sub_140011ee0(&var_758)
void var_788
j__RTC_CheckStackVars(&var_788, &data_14001db70)
j___security_check_cookie(rax_1 ^ &s)
return result
This is obviously the execution path we are interested in:
sub_1400121e0(&var_758, 0x100)
result = j_sub_140011ee0(&var_758)
this first function is pretty small an easy to analyze.
void* sub_1400121e0(void* arg1, int64_t arg2)
j___CheckForDebuggerJustMyCode(&data_1400270a4)
void* t = GetProcAddress(hModule: GetModuleHandleA(lpModuleName: "ntdll"), lpProcName: "NtSetInformationThread")
if (t != 0)
t = t(GetCurrentThread(), ThreadHideFromDebugger, 0, 0)
if (arg2 != 0)
int64_t i
for (i = 0; i u< 0x25; i += 1)
if (i + 1 u>= arg2)
break
*(arg1 + i) = *(&data_14001cbb0 + i) ^ *(&data_14001cbd8 + modu.dp.q(0:i, 4))
t = arg1 + i
*t = 0
return t
it does the same antidebug stuff as usual, then uses two tables built into the file to write something to the buffer we pass it.
data_14001cbb0:
9d 97 e2 ba ad c8 cc 9c 82 c5 cb 96 b0 c5 e2 a9 ................
bf db d1 9d b7 d9 db 9c 82 fd cb 82 ae c6 d7 81 ................
f0 c5 ca 82 b2 00 00 00 ........
...
data_14001cbd8:
de ad be ef 00 00 00 00 ........
this is simple xor decoding. We can decode it to obtain the absolute path to the html file.
In [1]:
...: cbb0 = (
...: b"\x9d\x97\xe2\xba\xad\xc8\xcc\x9c\x82\xc5\xcb\x96\xb0\xc5\xe2\xa9"
...: b"\xbf\xdb\xd1\x9d\xb7\xd9\xdb\x9c\x82\xfd\xcb\x82\xae\xc6\xd7\x81"
...: b"\xf0\xc5\xca\x82\xb2\x00\x00\x00"
...: )
...: key = b"\xde\xad\xbe\xef"
...:
...: out = bytearray()
...: for i, b in enumerate(cbb0):
...: if b == 0x00:
...: break
...: out.append(b ^ key[i % 4])
...:
...: decoded = bytes(out)
...: print(decoded.decode("ascii", "replace"))
"C:\Users\huynh\Favorites\Pumpkin.html"
This path is then passed to the second function:
int64_t sub_140011ee0(char* abspath)
void s
__builtin_memset(&s, c: 0xcccccccc, n: 0x348)
int64_t rax_1 = __security_cookie ^ &s
j___CheckForDebuggerJustMyCode(&data_1400270a4)
uint8_t* lpMem = nullptr
uint32_t var_444 = 0
uint8_t* lpMem_1 = nullptr
uint32_t var_404 = 0
int64_t rax_3 = GetProcAddress(hModule: GetModuleHandleA(lpModuleName: "ntdll"), lpProcName: "NtSetInformationThread")
if (rax_3 != 0)
rax_3(GetCurrentThread(), 0x11, 0, 0)
void var_4d0
int64_t result
void var_4a0
if (zx.d(sub_140012490(&var_4d0)) != 0)
if (zx.d(sub_140013500(abspath, &var_4a0)) != 0)
if (zx.d(sub_140012da0(abspath, &lpMem, &var_444)) != 0)
if (zx.d(sub_140011930(&var_4a0, &var_4d0, lpMem, var_444, &lpMem_1, &var_404)) != 0)
HeapFree(hHeap: GetProcessHeap(), dwFlags: HEAP_NONE, lpMem)
void var_3d8
j_sub_140014430(&var_3d8, 0x208, "%s.enc", abspath)
if (zx.d(sub_140013900(&var_3d8, lpMem_1, var_404)) != 0)
HeapFree(hHeap: GetProcessHeap(), dwFlags: HEAP_NONE, lpMem: lpMem_1)
result = 0
else
HeapFree(hHeap: GetProcessHeap(), dwFlags: HEAP_NONE, lpMem: lpMem_1)
result = 0xfffffffb
else
HeapFree(hHeap: GetProcessHeap(), dwFlags: HEAP_NONE, lpMem)
result = 0xfffffffc
else
MessageBoxA(hWnd: nullptr, lpText: &data_14001dee0, lpCaption: "ALERT", uType: MB_ICONASTERISK)
Sleep(dwMilliseconds: 0xbb8)
result = 0xfffffffd
else
result = 0xfffffffe
else
result = 0xffffffff
void var_508
j__RTC_CheckStackVars(&var_508, &data_14001d540)
j___security_check_cookie(rax_1 ^ &s)
return result
At this point its obvious that this binary is what was used to encode Pumpkin.html.enc. This function is pretty ugly, so I just started looking at each condition one by one.
int32_t* sub_140012490(int32_t* buf)
void s
__builtin_memset(&s, c: 0xcccccccc, n: 0x138)
int64_t rax_1 = __security_cookie ^ &s
j___CheckForDebuggerJustMyCode(&data_1400270a4)
HMODULE hModule = GetModuleHandleA(lpModuleName: nullptr)
int32_t* result
if (hModule == 0)
struct _iobuf* rax_2
int64_t r8_1
rax_2, r8_1 = __acrt_iob_func(id: 2)
j_sub_140013de0(rax_2, "[!] GetModuleHandle(NULL) failed…", r8_1)
result.b = 0
else if (zx.d(hModule->unused.w) == 'MZ')
// e_lfanew
struct _IMAGE_NT_HEADERS64* nt_header = hModule + sx.q(hModule->__offset(0x3c).d)
if (*nt_header == 'PE')
IMAGE_SECTION_HEADER* section_table = &nt_header->OptionalHeader + zx.q(nt_header->FileHeader.SizeOfOptionalHeader)
int16_t i = 0
while (true)
if (zx.d(i) s>= zx.d(nt_header->FileHeader.NumberOfSections))
result.b = 0
break
void section_name
__builtin_memset(s: §ion_name, c: 0, n: 9)
memcpy(§ion_name, §ion_table[zx.q(i)], 8)
if (_stricmp(lhs: §ion_name, rhs: "vector") == 0)
int32_t VirtualAddress = section_table[zx.q(i)].VirtualAddress
int32_t var_24_1
if (section_table[zx.q(i)].Misc == 0)
var_24_1 = section_table[zx.q(i)].SizeOfRawData
else
var_24_1 = section_table[zx.q(i)].Misc
if (VirtualAddress == 0 || var_24_1 u< 0x10)
result.b = 0
else
memcpy(buf, hModule + zx.q(VirtualAddress), 0x10)
result.b = 1
break
i += 1
else
result.b = 0
else
result.b = 0
void var_228
j__RTC_CheckStackVars(&var_228, &data_14001cf90)
j___security_check_cookie(rax_1 ^ &s)
return result
This function walks the section headers of the current module, searching for a section named "vector". After finding this section, it copies the first 16 bytes directly from the mapped section into the buffer.
Because we have the binary, we can grab these bytes straight from the file:
vector section started {0x140077000-0x140077200}
03 38 fa f5 d2 67 62 17 15 0e 96 fe e0 41 68 a0
Now, onto the next function. This one takes the abspath to pumpkin.html from before, as well as another buffer.
Every function name here is stripped, but they just tail call unstripped functions, so we can rename them easily to make things alot more readable. After doing this, the 2nd function is pretty simple
//...
NTSTATUS rax_2 = BCryptOpenAlgorithmProvider(phAlgorithm: &sha256alg, pszAlgId: u"SHA256", pszImplementation: nullptr, dwFlags: 0)
if (rax_2 == STATUS_SUCCESS)
NTSTATUS rax_3 = BCryptGetProperty(hObject: sha256alg, pszProperty: u"ObjectLength", pbOutput: &hashlen, cbOutput: 4, &pcbResult, dwFlags: 0)
if (rax_3 == STATUS_SUCCESS)
var_120 = HeapAlloc(hHeap: GetProcessHeap(), dwFlags: HEAP_NONE, dwBytes: zx.q(hashlen))
if (var_120 != 0)
NTSTATUS rax_8 = BCryptCreateHash(hAlgorithm: sha256alg, phHash: &sha256hash, pbHashObject: var_120, cbHashObject: hashlen, pbSecret: nullptr, cbSecret: 0, dwFlags: 0)
if (rax_8 == STATUS_SUCCESS)
uint32_t cbInput = strlen(abspath)
NTSTATUS rax_9 = BCryptHashData(hHash: sha256hash, pbInput: abspath, cbInput, dwFlags: 0)
if (rax_9 == STATUS_SUCCESS)
NTSTATUS rax_10 = BCryptFinishHash(hHash: sha256hash, pbOutput: out, cbOutput: 0x20, dwFlags: 0)
if (rax_10 == STATUS_SUCCESS)
var_104 = 1
// ...
It is just returning the basic sha256 digest of abspath ("C:\Users\huynh\Favorites\Pumpkin.html"). We can also trivially compute this manually.
The next function also takes abspath, and seems to output a buffer and an integer. After decompiling it, it is clear that it is reading the contents of the file into a heap allocated buffer, and returns the buffer and length.
The fourth function is passed the digest from before, the first 16 bytes of the vector section, the plaintext file content and len, and outputs a buffer and length.
This function is what does the actual encrpytion.
The function is very long, but if we apply the same tailcall resolving method as before and walk through each bcrypt call we can figure it out
// get a handle to aes algorithm
BCryptOpenAlgorithmProvider(phAlgorithm: &hAes, pszAlgId: &a_e_s, pszImplementation: nullptr, dwFlags: 0)
// use cbc chaining mode
BCryptSetProperty(hObject: hAes, pszProperty: u"ChainingMode", pbInput: u"ChainingModeCBC", cbInput: 0x20, dwFlags: 0)
// allocate buffer with key object size
BCryptGetProperty(hObject: hAes, pszProperty: u"ObjectLength", pbOutput: &objectlen, ...)
keyObj = HeapAlloc(hHeap: GetProcessHeap(), dwFlags: HEAP_NONE, dwBytes: zx.q(objectlen))
// generate a symetric key use our digest as the secret
digest_.q = _digest
BCryptGenerateSymmetricKey(hAlgorithm: hAes, phKey: &hKey, pbKeyObject: keyObj, cbKeyObject: objectlen, pbSecret: digest_, cbSecret: 0x20, dwFlags: 0)
After this, there are two BCryptEncrypt calls.
void vec_copy
memcpy(&vec_copy, vector_section, 0x10)
uint32_t pbOutput
pbOutput.q = 0
pIV.q = &vec_copy
BCryptEncrypt(hKey, pbInput: plaintext, cbInput: plaintext_len, pPaddingInfo: nullptr, pbIV: pIV, cbIV: 0x10, pbOutput, cbOutput: 0, pcbResult: &cb1Res, dwFlags: BCRYPT_BLOCK_PADDING)
This first pass makes a copy of the 16 vector bytes from before and uses them as the IV. After this call, cb1Res will hold the rounded ciphertext size.
After this, this length is used to allocate a buffer and the same BCryptEncrypt call is made again. the IV needs to be copied again because BCryptEncrypt may mutate the buffer.
outbuf = HeapAlloc(hHeap: GetProcessHeap(), dwFlags: HEAP_NONE, dwBytes: zx.q(cb1Res))
if (outbuf != 0)
memcpy(&vec_copy, vector_section, 0x10)
pbOutput.q = outbuf
pIV.q = &vec_copy
NTSTATUS rax_20 = BCryptEncrypt(hKey, pbInput: plaintext, cbInput: plaintext_len, pPaddingInfo: nullptr, pbIV: pIV, cbIV: 0x10, pbOutput, cbOutput: cb1Res, &pcbResult, dwFlags: BCRYPT_BLOCK_PADDING)
*bufp = outbuf
*lenp = pcbResult
// ...
return;
In summary, we can say that the file was encrypted with AES 256 (32 byte key, the sha256 of the path string), with mode = ChainingModeCBC, padding = BCRYPT_BLOCK_PADDING (which is PKCS#7 apparently).
We also know the parameters IV = 0338faf5d2676217150e96fee04168a0, and key = sha256("C:\Users\huynh\Favorites\Pumpkin.html") = 35277fe93124db1d8f32ab9ed06a6925ae5dc84e82c8d21b3397310c89f608c5
Based, on this we can decrypt pumpkin.html.enc and obtain Pumpkin.html, which is a pretty looking website with animated bats that contains our flag!