diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2018-08-17 22:22:12 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2018-08-17 22:22:12 +0200 |
commit | 47e6a680daeb0c789162b7f9528960cf5bf07adb (patch) | |
tree | 894d61d2ce363e7ee2359c1ceaafe86618a1cef5 /crenshaw | |
parent | 155e1b550268ff3dee93a91de4ea5a66b6ff3566 (diff) | |
download | compilertests-47e6a680daeb0c789162b7f9528960cf5bf07adb.tar.gz compilertests-47e6a680daeb0c789162b7f9528960cf5bf07adb.tar.bz2 |
crenshaw: added two assignments (just for testing)
extended emul to properly handle hash collisions in the instruction map
Diffstat (limited to 'crenshaw')
-rw-r--r-- | crenshaw/README | 8 | ||||
-rw-r--r-- | crenshaw/emul.c | 52 | ||||
-rw-r--r-- | crenshaw/main.pas | 24 | ||||
-rw-r--r-- | crenshaw/test.prog | 3 |
4 files changed, 65 insertions, 22 deletions
diff --git a/crenshaw/README b/crenshaw/README index 643d4b5..74f0a20 100644 --- a/crenshaw/README +++ b/crenshaw/README @@ -26,6 +26,12 @@ TODO: kapstone to actually assemble the code, currently we use nasm to produce a binary and we use capstone to decode it. What's better? +TODO: architecture-independend 'hlt' instruction to stop +emulation at the right point? + +TODO: we would like to set flavours of the CPU, a real i486 +instruction set or enable/disable certain features like SSE2. + links ----- @@ -122,3 +128,5 @@ tutor3, getchar/white space handling Interestingly he starts with a non-scanner, parser-only approach and introduces lexing stuff afterwards. +Checking for LF feels hacky. + diff --git a/crenshaw/emul.c b/crenshaw/emul.c index 2de6aa4..104cac6 100644 --- a/crenshaw/emul.c +++ b/crenshaw/emul.c @@ -166,23 +166,35 @@ int main( int argc, char *argv[] ) // opcode when reaching a certain EIP address int N = nof_instrs * 2; int p = compute_p( N ); - N = ( p << N ); - int *instrs_map = calloc( 1, N ); -AGAIN: + N = ( 1 << p ); + int *instrs_map = calloc( N, sizeof( int ) * 2 ); for( int i = 0; i < nof_instrs; i++ ) { - int n = mul_hash( instrs[i].address, p ); - assert( n < N ); - if( instrs_map[n] != 0 ) { - fprintf( stderr, "WARN: hash collision in instruction map, reallocating hash..\n" ); - assert( N < 31 ); - N <<= 1; - instrs_map = realloc( instrs_map, N ); - goto AGAIN; + int n = mul_hash( instrs[i].address, p ) * 2; + assert( n < 2 * N ); + while( instrs_map[n+1] != 0 ) { + n += 2; + if( n >= 2 * N ) { + n = 0; + } } instrs_map[n] = i; - printf( "map %08X %d %d\n", instrs[i].address, n, i ); + instrs_map[n+1] = instrs[i].address; } - + + // verify the EIP to instr index map has been constructed correctly + for( int i = 0; i < nof_instrs; i++ ) { + uint64_t a = instrs[i].address; + int n = mul_hash( a, p ) * 2; + while( instrs_map[n+1] != a ) { + n += 2; + if( n >= 2 * N ) { + n = 0; + } + } + n = instrs_map[n]; + assert( n == i ); + } + // write executable code to emulator uerr = uc_mem_write( uc, CODE_START, code, code_size ); if( uerr != UC_ERR_OK ) { @@ -199,15 +211,23 @@ AGAIN: //~ cs_insn *instr = cs_malloc( cs ); bool terminate = false; int iteration = 1; - + + printf( "Single step execution:\n" ); while( !terminate ) { printf( "-- iteration %d\n", iteration ); iteration++; - int n = instrs_map[mul_hash( address, p )]; + int n = mul_hash( address, p ) * 2; + while( instrs_map[n+1] != address ) { + n += 2; + if( n >= 2 * N ) { + n = 0; + } + } + n = instrs_map[n]; - printf( "%04X (%d): ", (unsigned int)address, n ); + printf( "%04X: ", (unsigned int)address ); for( int i = 0; i < instrs[n].size; i++ ) { printf( "%02X", instrs[n].bytes[i] ); diff --git a/crenshaw/main.pas b/crenshaw/main.pas index 339484c..0694ad4 100644 --- a/crenshaw/main.pas +++ b/crenshaw/main.pas @@ -2,7 +2,7 @@ program Main; const TAB = ^I; - CR = ^M; + LF = ^J; var Look : char; @@ -101,7 +101,7 @@ begin if symbols[i].name = name then found := true; end; - if i = 26 then Abort('Table of variable symbols overflowed'); + if i = 26 then Abort('Table of symbols overflowed'); if not found then begin symbols[nof_symbols].name := name; symbols[nof_symbols].sym_type := sym_type; @@ -117,10 +117,10 @@ begin Match('('); Match(')'); RememberName(name, functionType); - EmitLn('call ' + name); + EmitLn('call '+name); end else begin RememberName(name, variableType); - EmitLn('mov eax, [' + name + ']'); + EmitLn('mov eax,['+name+']'); end; end; @@ -201,6 +201,17 @@ begin end; end; +procedure Assignment; +var name : char; +begin + name := GetName; + RememberName(name, variableType); + Match('='); + Expression; + EmitLn('lea ebx,['+name+']'); + EmitLn('mov [ebx],eax'); +end; + procedure Init; begin nof_symbols := 0; @@ -256,6 +267,9 @@ end; begin Prologue; Init; - Expression; + Assignment; + Match(LF); + Assignment; + if Look <> LF then Expected('Newline'); Epilogue; end. diff --git a/crenshaw/test.prog b/crenshaw/test.prog index 880418a..2be58d7 100644 --- a/crenshaw/test.prog +++ b/crenshaw/test.prog @@ -1 +1,2 @@ -(4+4)*3/2-3+a-b*f() +x=(4+4)*3/2-3+a-b*f() +y=x*2 |