diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2018-08-17 15:14:24 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2018-08-17 15:14:24 +0200 |
commit | 155e1b550268ff3dee93a91de4ea5a66b6ff3566 (patch) | |
tree | 4c1ed87b490641d195c278c185994ee54f6ec2cf /crenshaw | |
parent | a92040828b10dee6e699417742fd40733439c11b (diff) | |
download | compilertests-155e1b550268ff3dee93a91de4ea5a66b6ff3566.tar.gz compilertests-155e1b550268ff3dee93a91de4ea5a66b6ff3566.tar.bz2 |
crenshaw: rewrote capstone decoder in emul, working now correctly
Diffstat (limited to 'crenshaw')
-rw-r--r-- | crenshaw/README | 16 | ||||
-rw-r--r-- | crenshaw/emul.c | 102 |
2 files changed, 98 insertions, 20 deletions
diff --git a/crenshaw/README b/crenshaw/README index 0aaffcb..643d4b5 100644 --- a/crenshaw/README +++ b/crenshaw/README @@ -104,7 +104,21 @@ Vxxx variables Fxxx functions then we could decide on the LookAhead character. But that's -hardly a benefit for the people using the language. +hardly a benefit for the people using the language. Deciding +AFTER reading a symbol or in this case Ident and use the +lookahead '('. Or we now after reading what type it is (declared). The approach here is that 'x' is the variable and 'x()' is the function call. + +The generated code is not complete, we also need at least to generate +some function stubs with a 'ret'. + +Also the distinction between variable and function names would be +better suited to introduce here. + +tutor3, getchar/white space handling + +Interestingly he starts with a non-scanner, parser-only approach and +introduces lexing stuff afterwards. + diff --git a/crenshaw/emul.c b/crenshaw/emul.c index 8a0b79c..2de6aa4 100644 --- a/crenshaw/emul.c +++ b/crenshaw/emul.c @@ -7,6 +7,7 @@ #include <string.h> #include <errno.h> #include <stdbool.h> +#include <assert.h> #define PAGE_SIZE 4 * 1024 * 1024 #define CODE_START 0x1000000 @@ -39,6 +40,22 @@ static void dump_regs( uc_engine *uc ) printf( "ECX: %08x\n", ecx ); } +uint32_t mul_hash( uint64_t x, int p ) +{ + uint32_t v = x * INT32_C( 2654435761 ); + return v >> ( 32 - p ); +} + +int compute_p( int size ) +{ + int p = 0; + while( size > 0 ) { + size >>= 1; + p++; + } + return p; +} + int main( int argc, char *argv[] ) { uc_engine *uc; @@ -50,7 +67,9 @@ int main( int argc, char *argv[] ) int res; csh cs; cs_err cerr; - + size_t nof_instrs; + cs_insn *instrs; + uerr = uc_open( UC_ARCH_X86, UC_MODE_32, &uc ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_open( ): %s\n", uc_strerror( uerr ) ); @@ -93,6 +112,12 @@ int main( int argc, char *argv[] ) } file_size = ftell( f ); + if( file_size > CODE_SIZE ) { + fprintf( stderr, "ERROR: we should really not run the emulator with a code segment bigger than %d\n", CODE_SIZE ); + uc_close( uc ); + exit( EXIT_FAILURE ); + } + code = (char *)malloc( file_size ); if( code == NULL ) { fprintf( stderr, "ERROR: out of memory\n" ); @@ -109,13 +134,56 @@ int main( int argc, char *argv[] ) code_size = fread( code, 1, file_size, f ); if( code_size != file_size ) { - fprintf( stderr, "ERROR: could not read complete code file, read only %d bytes instead of %d bytes\n", + fprintf( stderr, "ERROR: could not read complete code file, read only %zu bytes instead of %zu bytes\n", code_size, file_size ); uc_close( uc ); exit( EXIT_FAILURE ); } - printf( "Read %d bytes of code..\n", code_size ); + printf( "Read %zu bytes of code..\n", code_size ); + + // disassemble the whole code block + nof_instrs = cs_disasm( cs, (const uint8_t *)code, code_size, CODE_START, 0, &instrs ); + if( nof_instrs == 0 ) { + fprintf( stderr, "ERROR: failed to call cs_disasm( ): %s\n", cs_strerror( cs_errno( cs ) ) ); + cs_close( &cs ); + uc_close( uc ); + exit( EXIT_FAILURE ); + } + // print all the disassembled code + for( int i = 0; i < nof_instrs; i++ ) { + printf( "%04X: ", (uint32_t)instrs[i].address ); + for( int j = 0; j < instrs[i].size; j++ ) { + printf( "%02X", instrs[i].bytes[j] ); + } + for( int j = ( 16 - instrs[i].size ) * 2; j > 0; j-- ) { + printf( " " ); + } + printf( "%s %s\n", instrs[i].mnemonic, instrs[i].op_str ); + } + + // remember address to instrs indexes so we can get the current + // opcode when reaching a certain EIP address + int N = nof_instrs * 2; + int p = compute_p( N ); + N = ( p << N ); + int *instrs_map = calloc( 1, N ); +AGAIN: + for( int i = 0; i < nof_instrs; i++ ) { + int n = mul_hash( instrs[i].address, p ); + assert( n < N ); + if( instrs_map[n] != 0 ) { + fprintf( stderr, "WARN: hash collision in instruction map, reallocating hash..\n" ); + assert( N < 31 ); + N <<= 1; + instrs_map = realloc( instrs_map, N ); + goto AGAIN; + } + instrs_map[n] = i; + printf( "map %08X %d %d\n", instrs[i].address, n, i ); + } + + // write executable code to emulator uerr = uc_mem_write( uc, CODE_START, code, code_size ); if( uerr != UC_ERR_OK ) { fprintf( stderr, "ERROR: failed to call uc_mem_write( ): %s\n", uc_strerror( uerr ) ); @@ -128,7 +196,7 @@ int main( int argc, char *argv[] ) uc_reg_write( uc, UC_X86_REG_ESP, &esp ); uint64_t address = CODE_START; - cs_insn *instr = cs_malloc( cs ); + //~ cs_insn *instr = cs_malloc( cs ); bool terminate = false; int iteration = 1; @@ -137,23 +205,18 @@ int main( int argc, char *argv[] ) printf( "-- iteration %d\n", iteration ); iteration++; - printf( "%04X: ", address ); - - if( !cs_disasm_iter( cs, (const uint8_t **)&code, &code_size, &address, instr ) ) { - fprintf( stderr, "ERROR: failed to call cs_disasm_iter( ): %s\n", cs_strerror( cs_errno( cs ) ) ); - cs_close( &cs ); - uc_close( uc ); - exit( EXIT_FAILURE ); - } - - for( int i = 0; i < instr->size; i++ ) { - printf( "%02X", instr->bytes[i] ); + int n = instrs_map[mul_hash( address, p )]; + + printf( "%04X (%d): ", (unsigned int)address, n ); + + for( int i = 0; i < instrs[n].size; i++ ) { + printf( "%02X", instrs[n].bytes[i] ); } - for( int i = ( 16 - instr->size ) *2; i > 0; i-- ) { + for( int i = ( 16 - instrs[n].size ) * 2; i > 0; i-- ) { printf( " " ); } - printf( " %s\t\t%s\n", instr->mnemonic, instr->op_str ); + printf( "%s %s\n", instrs[n].mnemonic, instrs[n].op_str ); uerr = uc_emu_start( uc, addr, CODE_START + CODE_SIZE, 0, 1 ); if( uerr != UC_ERR_OK ) { @@ -170,12 +233,13 @@ int main( int argc, char *argv[] ) dump_regs( uc ); - if( strcmp( instr->mnemonic, "hlt" ) == 0 ) { + if( strcmp( instrs[n].mnemonic, "hlt" ) == 0 ) { terminate = true; } } - cs_free( instr, 1 ); + free( instrs_map ); + cs_free( instrs, nof_instrs ); cs_close( &cs ); uc_close( uc ); |