#include #include #include #include #include #include #include #include #include #include #include #define PAGE_SIZE 4 * 1024 * 1024 #define CODE_START 0x1000000 #define CODE_SIZE PAGE_SIZE #define STACK_SIZE PAGE_SIZE #define STACK_START CODE_START + CODE_SIZE + STACK_SIZE #define EIP_START CODE_ADDRESS #define ESP_START STACK_START #define EBP_START ESP_START static void dump_regs( uc_engine *uc ) { uint32_t eip; uint32_t esp; uint32_t ebp; uint32_t eax; uint32_t ebx; uint32_t ecx; uint32_t edx; uint32_t esi; uint32_t edi; uc_reg_read( uc, UC_X86_REG_EIP, &eip ); uc_reg_read( uc, UC_X86_REG_ESP, &esp ); uc_reg_read( uc, UC_X86_REG_EBP, &ebp ); uc_reg_read( uc, UC_X86_REG_EAX, &eax ); uc_reg_read( uc, UC_X86_REG_EBX, &ebx ); uc_reg_read( uc, UC_X86_REG_ECX, &ecx ); uc_reg_read( uc, UC_X86_REG_EDX, &edx ); uc_reg_read( uc, UC_X86_REG_ESI, &esi ); uc_reg_read( uc, UC_X86_REG_EDI, &edi ); printf( "EIP: %08X\n", eip ); printf( "ESP: %08X\n", esp ); printf( "EBP: %08X\n", ebp ); printf( "EAX: %08X\n", eax ); printf( "EBX: %08X\n", ebx ); printf( "ECX: %08X\n", ecx ); printf( "EDX: %08X\n", edx ); printf( "ESI: %08X\n", esi ); printf( "EDI: %08X\n", edi ); } static void dump_stack( uc_engine *uc ) { uint32_t esp; uint8_t mem[4]; uc_reg_read( uc, UC_X86_REG_ESP, &esp ); printf( "stack:\n" ); for( int i = esp; i < STACK_START; i += 4 ) { uc_mem_read( uc, i, &mem, 4 ); printf( "%08X: %02X%02X%02X%02X\n", i, mem[3], mem[2], mem[1], mem[0] ); } } //~ static void initialize_memory( uc_engine *uc, uint64_t start_address, uint64_t end_address ) //~ { //~ uint8_t mem[4]; //~ memset( mem, 0, 4 ); //~ for( uint64_t a = start_address; a < end_address; a += 4 ) { //~ uc_mem_write( uc, a, &mem, 4 ); //~ } //~ } static void dump_memory( uc_engine *uc, uint64_t start_address, uint64_t end_address ) { uint8_t mem[4]; printf( "data:\n" ); for( uint64_t a = start_address; a < end_address; a += 4 ) { uc_mem_read( uc, a, &mem, 4 ); printf( "%08X: %02X%02X%02X%02X\n", (uint32_t)a, mem[0], mem[1], mem[2], mem[3] ); } } static uint32_t mul_hash( uint64_t x, int p ) { uint32_t v = x * INT32_C( 2654435761 ); return v >> ( 32 - p ); } static int compute_p( int size ) { int p = 0; while( size > 0 ) { size >>= 1; p++; } return p; } static void handle_interrupts( uc_engine *uc, uint32_t interrupt, void *user_data ) { uint32_t eax, ebx, ecx, edx; bool trace = *((bool *)user_data); if( trace ) { printf( "INT %x\n", interrupt ); dump_regs( uc ); } if( interrupt != 0x80 ) { return; } uc_reg_read( uc, UC_X86_REG_EAX, &eax ); switch( eax ) { case 4: // SYSCALL_WRITE { unsigned char buffer[256]; size_t size; uc_err uerr; uc_reg_read( uc, UC_X86_REG_EBX, &ebx ); if( ebx != 1 ) { fprintf( stderr, "ERROR: failed to call SYSCALL_WRITE on non-stdout (not implemented)\n" ); return; } uc_reg_read( uc, UC_X86_REG_ECX, &ecx ); uc_reg_read( uc, UC_X86_REG_EDX, &edx ); size = sizeof( buffer ) - 1; if( edx < sizeof( buffer ) - 1 ) { size = edx; } uerr = uc_mem_read( uc, ecx, buffer, size ); buffer[size] = '\0'; if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_mem_read( ) in int x080 syscall 4 (SYSCALL_WRITE): %s\n", uc_strerror( uerr ) ); dump_regs( uc ); exit( EXIT_FAILURE ); } printf( "%*s", size, buffer ); } break; default: fprintf( stderr, "WARN: unknown syscall %0x, EAX: %d\n", interrupt, eax ); } } int main( int argc, char *argv[] ) { uc_engine *uc; uc_err uerr; char *code; size_t code_size; size_t file_size; FILE *f; int res; csh cs; cs_err cerr; size_t nof_instrs; cs_insn *instrs; uint64_t data_start = 0; size_t data_size = 0; int opt; bool verbose = false; bool dump = false; bool trace = false; while( ( opt = getopt( argc, argv, "vdth" ) ) != -1 ) { switch( opt ) { case 'v': verbose = true; break; case 'd': dump = true; break; case 't': trace = true; break; case 'h': printf( "Usage: %s [-v] [file.bin]\n\n", argv[0] ); printf( "Options:\n" ); printf( " -h show help\n" ); printf( " -v verbose output\n" ); printf( " -d dump code and data read (at start and at the end)\n" ); printf( " -t trace and print single stepts during emulation\n" ); exit( EXIT_SUCCESS ); default: fprintf( stderr, "Usage: %s [-v] [file.bin]\n", argv[0] ); exit( EXIT_FAILURE ); } } if( optind >= argc ) { fprintf( stderr, "Expected a binary file to interpret\n" ); exit( EXIT_FAILURE ); } uerr = uc_open( UC_ARCH_X86, UC_MODE_32, &uc ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_open( ): %s\n", uc_strerror( uerr ) ); exit( EXIT_FAILURE ); } cerr = cs_open( CS_ARCH_X86, CS_MODE_32, &cs ); if( cerr != CS_ERR_OK ) { uc_close( uc ); fprintf( stderr, "ERROR: failed to call uc_open( ): %s\n", uc_strerror( uerr ) ); exit( EXIT_FAILURE ); } uerr = uc_mem_map( uc, CODE_START, CODE_SIZE, UC_PROT_ALL ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_mem_map( ) for code memory: %s\n", uc_strerror( uerr ) ); uc_close( uc ); exit( EXIT_FAILURE ); } uerr = uc_mem_map( uc, STACK_START - STACK_SIZE, STACK_SIZE, UC_PROT_ALL ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_mem_map( ) for stack memory: %s\n", uc_strerror( uerr ) ); uc_close( uc ); exit( EXIT_FAILURE ); } f = fopen( argv[optind], "r" ); if( f == NULL ) { fprintf( stderr, "ERROR: unable to read file '%s': %s\n", argv[1], strerror( errno ) ); uc_close( uc ); exit( EXIT_FAILURE ); } res = fseek( f, 0, SEEK_END ); if( res != 0 ) { fprintf( stderr, "ERROR: unable to seek to end of file '%s': %s\n", argv[1], strerror( errno ) ); uc_close( uc ); exit( EXIT_FAILURE ); } file_size = ftell( f ); if( file_size > CODE_SIZE ) { fprintf( stderr, "ERROR: we should really not run the emulator with a code segment bigger than %d\n", CODE_SIZE ); uc_close( uc ); exit( EXIT_FAILURE ); } code = (char *)malloc( file_size ); if( code == NULL ) { fprintf( stderr, "ERROR: out of memory\n" ); uc_close( uc ); exit( EXIT_FAILURE ); } fseek( f, 0, SEEK_SET ); if( res != 0 ) { fprintf( stderr, "ERROR: unable to seek to start of file '%s': %s\n", argv[1], strerror( errno ) ); uc_close( uc ); exit( EXIT_FAILURE ); } code_size = fread( code, 1, file_size, f ); if( code_size != file_size ) { fprintf( stderr, "ERROR: could not read complete code file, read only %zu bytes instead of %zu bytes\n", code_size, file_size ); uc_close( uc ); exit( EXIT_FAILURE ); } if( verbose ) { printf( "Read %zu bytes of code and static data..\n", code_size ); } fclose( f ); // disassemble the whole code block nof_instrs = cs_disasm( cs, (const uint8_t *)code, code_size, CODE_START, 0, &instrs ); if( nof_instrs == 0 ) { fprintf( stderr, "ERROR: failed to call cs_disasm( ): %s\n", cs_strerror( cs_errno( cs ) ) ); cs_close( &cs ); uc_close( uc ); exit( EXIT_FAILURE ); } if( verbose ) { printf( "Executing code..\n" ); } // print all the disassembled code for( int i = 0; i < nof_instrs; i++ ) { if( dump ) { printf( "%04X: ", (uint32_t)instrs[i].address ); for( int j = 0; j < instrs[i].size; j++ ) { printf( "%02X", instrs[i].bytes[j] ); } for( int j = ( 16 - instrs[i].size ) * 2; j > 0; j-- ) { printf( " " ); } if( data_start == 0 ) { printf( "%s %s\n", instrs[i].mnemonic, instrs[i].op_str ); } else { printf( "data\n" ); } } /* code and data segment are separated by a 'hlt' instruction, * 'hlt' must not occur anywhere else and 'hlt' must be the * last instruction in the code segment. * Then we can calculate the beginning of the data segment * (this is all needed as we have a flat binary format only) */ if( strcmp( instrs[i].mnemonic, "hlt" ) == 0 ) { data_start = instrs[i].address + instrs[i].size; //~ nof_instrs = i; break; } //~ } //~ if( instrs[i].size == 2 && instrs[i].bytes[0] == 0 && instrs[i].bytes[1] == 0 ) { //~ if( data_start == 0 ) { //~ data_start = instrs[i].address; //~ } //~ exit( 1 ); //~ printf( "data\n" ); //~ } else { //~ } } // remember address to instrs indexes so we can get the current // opcode when reaching a certain EIP address int N = nof_instrs * 2; int p = compute_p( N ); N = ( 1 << p ); int *instrs_map = calloc( N, sizeof( int ) * 2 ); for( int i = 0; i < nof_instrs; i++ ) { int n = mul_hash( instrs[i].address, p ) * 2; assert( n < 2 * N ); while( instrs_map[n+1] != 0 ) { n += 2; if( n >= 2 * N ) { n = 0; } } instrs_map[n] = i; instrs_map[n+1] = instrs[i].address; } // verify the EIP to instr index map has been constructed correctly for( int i = 0; i < nof_instrs; i++ ) { uint64_t a = instrs[i].address; int n = mul_hash( a, p ) * 2; while( instrs_map[n+1] != a ) { n += 2; if( n >= 2 * N ) { n = 0; } } n = instrs_map[n]; assert( n == i ); } // write executable code to emulator uerr = uc_mem_write( uc, CODE_START, code, code_size ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_mem_write( ): %s\n", uc_strerror( uerr ) ); uc_close( uc ); exit( EXIT_FAILURE ); } // initialize memory (to make tests deterministic) //~ if( data_start == 0 ) { //~ data_start = CODE_START + code_size; //~ } //~ initialize_memory( uc, data_start, data_start + DATA_SIZE ); // initialize stack/base pointer int addr = CODE_START; int esp = STACK_START; uc_reg_write( uc, UC_X86_REG_ESP, &esp ); uc_reg_write( uc, UC_X86_REG_EBP, &esp ); data_size = file_size - ( data_start - CODE_START ); if( dump ) { dump_memory( uc, data_start, data_start + data_size ); printf( "core start %x\n", (unsigned int)CODE_START ); printf( "data start %x\n", (unsigned int)data_start ); printf( "data size %x\n", (unsigned int)data_size ); printf( "stack start %x\n", (unsigned int)esp ); } uint64_t address = CODE_START; bool terminate = false; bool notfound = false; int iteration = 1; if( trace ) { printf( "Single step execution:\n" ); } // hook for emulating syscalls (int 0x80 on the host) uc_hook hook; uerr = uc_hook_add( uc, &hook, UC_HOOK_INTR, handle_interrupts, (void *)&trace, 1, 0 ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_hook_add( handle_interrupts ): %s\n", uc_strerror( uerr ) ); cs_close( &cs ); uc_close( uc ); exit( EXIT_FAILURE ); } while( !terminate ) { if( trace ) { printf( "-- iteration %d\n", iteration ); } iteration++; int n = mul_hash( address, p ) * 2; while( instrs_map[n+1] != address ) { n += 2; if( n >= 2 * N ) { if( !notfound ) { n = 0; notfound = true; } else { fprintf( stderr, "ERROR: address %X not found in hashmap\n", (unsigned int)address ); cs_close( &cs ); uc_close( uc ); exit( EXIT_FAILURE ); } } } n = instrs_map[n]; if( trace ) { printf( "%04X: ", (unsigned int)address ); for( int i = 0; i < instrs[n].size; i++ ) { printf( "%02X", instrs[n].bytes[i] ); } for( int i = ( 16 - instrs[n].size ) * 2; i > 0; i-- ) { printf( " " ); } printf( "%s %s\n", instrs[n].mnemonic, instrs[n].op_str ); } uerr = uc_emu_start( uc, addr, CODE_START + code_size, 0, 1 ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_emu_start( ): %s\n", uc_strerror( uerr ) ); cs_close( &cs ); uc_close( uc ); exit( EXIT_FAILURE ); } int eip; uc_reg_read( uc, UC_X86_REG_EIP, &eip ); addr = eip; address = eip; if( trace ) { dump_regs( uc ); dump_stack( uc ); dump_memory( uc, data_start, data_start + data_size ); } if( strcmp( instrs[n].mnemonic, "hlt" ) == 0 ) { terminate = true; } } if( dump ) { dump_regs( uc ); dump_stack( uc ); dump_memory( uc, data_start, data_start + data_size ); } if( verbose ) { printf( "Done, executed %d instructions.\n", iteration ); } free( instrs_map ); cs_free( instrs, nof_instrs ); cs_close( &cs ); uc_close( uc ); free( code ); exit( EXIT_SUCCESS ); }