summaryrefslogtreecommitdiff
path: root/crenshaw
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2018-08-17 15:14:24 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2018-08-17 15:14:24 +0200
commit155e1b550268ff3dee93a91de4ea5a66b6ff3566 (patch)
tree4c1ed87b490641d195c278c185994ee54f6ec2cf /crenshaw
parenta92040828b10dee6e699417742fd40733439c11b (diff)
downloadcompilertests-155e1b550268ff3dee93a91de4ea5a66b6ff3566.tar.gz
compilertests-155e1b550268ff3dee93a91de4ea5a66b6ff3566.tar.bz2
crenshaw: rewrote capstone decoder in emul, working now correctly
Diffstat (limited to 'crenshaw')
-rw-r--r--crenshaw/README16
-rw-r--r--crenshaw/emul.c102
2 files changed, 98 insertions, 20 deletions
diff --git a/crenshaw/README b/crenshaw/README
index 0aaffcb..643d4b5 100644
--- a/crenshaw/README
+++ b/crenshaw/README
@@ -104,7 +104,21 @@ Vxxx variables
Fxxx functions
then we could decide on the LookAhead character. But that's
-hardly a benefit for the people using the language.
+hardly a benefit for the people using the language. Deciding
+AFTER reading a symbol or in this case Ident and use the
+lookahead '('. Or we now after reading what type it is (declared).
The approach here is that 'x' is the variable and 'x()' is the function
call.
+
+The generated code is not complete, we also need at least to generate
+some function stubs with a 'ret'.
+
+Also the distinction between variable and function names would be
+better suited to introduce here.
+
+tutor3, getchar/white space handling
+
+Interestingly he starts with a non-scanner, parser-only approach and
+introduces lexing stuff afterwards.
+
diff --git a/crenshaw/emul.c b/crenshaw/emul.c
index 8a0b79c..2de6aa4 100644
--- a/crenshaw/emul.c
+++ b/crenshaw/emul.c
@@ -7,6 +7,7 @@
#include <string.h>
#include <errno.h>
#include <stdbool.h>
+#include <assert.h>
#define PAGE_SIZE 4 * 1024 * 1024
#define CODE_START 0x1000000
@@ -39,6 +40,22 @@ static void dump_regs( uc_engine *uc )
printf( "ECX: %08x\n", ecx );
}
+uint32_t mul_hash( uint64_t x, int p )
+{
+ uint32_t v = x * INT32_C( 2654435761 );
+ return v >> ( 32 - p );
+}
+
+int compute_p( int size )
+{
+ int p = 0;
+ while( size > 0 ) {
+ size >>= 1;
+ p++;
+ }
+ return p;
+}
+
int main( int argc, char *argv[] )
{
uc_engine *uc;
@@ -50,7 +67,9 @@ int main( int argc, char *argv[] )
int res;
csh cs;
cs_err cerr;
-
+ size_t nof_instrs;
+ cs_insn *instrs;
+
uerr = uc_open( UC_ARCH_X86, UC_MODE_32, &uc );
if( uerr != UC_ERR_OK ) {
fprintf( stderr, "ERROR: failed to call uc_open( ): %s\n", uc_strerror( uerr ) );
@@ -93,6 +112,12 @@ int main( int argc, char *argv[] )
}
file_size = ftell( f );
+ if( file_size > CODE_SIZE ) {
+ fprintf( stderr, "ERROR: we should really not run the emulator with a code segment bigger than %d\n", CODE_SIZE );
+ uc_close( uc );
+ exit( EXIT_FAILURE );
+ }
+
code = (char *)malloc( file_size );
if( code == NULL ) {
fprintf( stderr, "ERROR: out of memory\n" );
@@ -109,13 +134,56 @@ int main( int argc, char *argv[] )
code_size = fread( code, 1, file_size, f );
if( code_size != file_size ) {
- fprintf( stderr, "ERROR: could not read complete code file, read only %d bytes instead of %d bytes\n",
+ fprintf( stderr, "ERROR: could not read complete code file, read only %zu bytes instead of %zu bytes\n",
code_size, file_size );
uc_close( uc );
exit( EXIT_FAILURE );
}
- printf( "Read %d bytes of code..\n", code_size );
+ printf( "Read %zu bytes of code..\n", code_size );
+
+ // disassemble the whole code block
+ nof_instrs = cs_disasm( cs, (const uint8_t *)code, code_size, CODE_START, 0, &instrs );
+ if( nof_instrs == 0 ) {
+ fprintf( stderr, "ERROR: failed to call cs_disasm( ): %s\n", cs_strerror( cs_errno( cs ) ) );
+ cs_close( &cs );
+ uc_close( uc );
+ exit( EXIT_FAILURE );
+ }
+ // print all the disassembled code
+ for( int i = 0; i < nof_instrs; i++ ) {
+ printf( "%04X: ", (uint32_t)instrs[i].address );
+ for( int j = 0; j < instrs[i].size; j++ ) {
+ printf( "%02X", instrs[i].bytes[j] );
+ }
+ for( int j = ( 16 - instrs[i].size ) * 2; j > 0; j-- ) {
+ printf( " " );
+ }
+ printf( "%s %s\n", instrs[i].mnemonic, instrs[i].op_str );
+ }
+
+ // remember address to instrs indexes so we can get the current
+ // opcode when reaching a certain EIP address
+ int N = nof_instrs * 2;
+ int p = compute_p( N );
+ N = ( p << N );
+ int *instrs_map = calloc( 1, N );
+AGAIN:
+ for( int i = 0; i < nof_instrs; i++ ) {
+ int n = mul_hash( instrs[i].address, p );
+ assert( n < N );
+ if( instrs_map[n] != 0 ) {
+ fprintf( stderr, "WARN: hash collision in instruction map, reallocating hash..\n" );
+ assert( N < 31 );
+ N <<= 1;
+ instrs_map = realloc( instrs_map, N );
+ goto AGAIN;
+ }
+ instrs_map[n] = i;
+ printf( "map %08X %d %d\n", instrs[i].address, n, i );
+ }
+
+ // write executable code to emulator
uerr = uc_mem_write( uc, CODE_START, code, code_size );
if( uerr != UC_ERR_OK ) {
fprintf( stderr, "ERROR: failed to call uc_mem_write( ): %s\n", uc_strerror( uerr ) );
@@ -128,7 +196,7 @@ int main( int argc, char *argv[] )
uc_reg_write( uc, UC_X86_REG_ESP, &esp );
uint64_t address = CODE_START;
- cs_insn *instr = cs_malloc( cs );
+ //~ cs_insn *instr = cs_malloc( cs );
bool terminate = false;
int iteration = 1;
@@ -137,23 +205,18 @@ int main( int argc, char *argv[] )
printf( "-- iteration %d\n", iteration );
iteration++;
- printf( "%04X: ", address );
-
- if( !cs_disasm_iter( cs, (const uint8_t **)&code, &code_size, &address, instr ) ) {
- fprintf( stderr, "ERROR: failed to call cs_disasm_iter( ): %s\n", cs_strerror( cs_errno( cs ) ) );
- cs_close( &cs );
- uc_close( uc );
- exit( EXIT_FAILURE );
- }
-
- for( int i = 0; i < instr->size; i++ ) {
- printf( "%02X", instr->bytes[i] );
+ int n = instrs_map[mul_hash( address, p )];
+
+ printf( "%04X (%d): ", (unsigned int)address, n );
+
+ for( int i = 0; i < instrs[n].size; i++ ) {
+ printf( "%02X", instrs[n].bytes[i] );
}
- for( int i = ( 16 - instr->size ) *2; i > 0; i-- ) {
+ for( int i = ( 16 - instrs[n].size ) * 2; i > 0; i-- ) {
printf( " " );
}
- printf( " %s\t\t%s\n", instr->mnemonic, instr->op_str );
+ printf( "%s %s\n", instrs[n].mnemonic, instrs[n].op_str );
uerr = uc_emu_start( uc, addr, CODE_START + CODE_SIZE, 0, 1 );
if( uerr != UC_ERR_OK ) {
@@ -170,12 +233,13 @@ int main( int argc, char *argv[] )
dump_regs( uc );
- if( strcmp( instr->mnemonic, "hlt" ) == 0 ) {
+ if( strcmp( instrs[n].mnemonic, "hlt" ) == 0 ) {
terminate = true;
}
}
- cs_free( instr, 1 );
+ free( instrs_map );
+ cs_free( instrs, nof_instrs );
cs_close( &cs );
uc_close( uc );