summaryrefslogtreecommitdiff
path: root/crenshaw
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2018-08-17 22:22:12 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2018-08-17 22:22:12 +0200
commit47e6a680daeb0c789162b7f9528960cf5bf07adb (patch)
tree894d61d2ce363e7ee2359c1ceaafe86618a1cef5 /crenshaw
parent155e1b550268ff3dee93a91de4ea5a66b6ff3566 (diff)
downloadcompilertests-47e6a680daeb0c789162b7f9528960cf5bf07adb.tar.gz
compilertests-47e6a680daeb0c789162b7f9528960cf5bf07adb.tar.bz2
crenshaw: added two assignments (just for testing)
extended emul to properly handle hash collisions in the instruction map
Diffstat (limited to 'crenshaw')
-rw-r--r--crenshaw/README8
-rw-r--r--crenshaw/emul.c52
-rw-r--r--crenshaw/main.pas24
-rw-r--r--crenshaw/test.prog3
4 files changed, 65 insertions, 22 deletions
diff --git a/crenshaw/README b/crenshaw/README
index 643d4b5..74f0a20 100644
--- a/crenshaw/README
+++ b/crenshaw/README
@@ -26,6 +26,12 @@ TODO: kapstone to actually assemble the code, currently
we use nasm to produce a binary and we use capstone to decode
it. What's better?
+TODO: architecture-independend 'hlt' instruction to stop
+emulation at the right point?
+
+TODO: we would like to set flavours of the CPU, a real i486
+instruction set or enable/disable certain features like SSE2.
+
links
-----
@@ -122,3 +128,5 @@ tutor3, getchar/white space handling
Interestingly he starts with a non-scanner, parser-only approach and
introduces lexing stuff afterwards.
+Checking for LF feels hacky.
+
diff --git a/crenshaw/emul.c b/crenshaw/emul.c
index 2de6aa4..104cac6 100644
--- a/crenshaw/emul.c
+++ b/crenshaw/emul.c
@@ -166,23 +166,35 @@ int main( int argc, char *argv[] )
// opcode when reaching a certain EIP address
int N = nof_instrs * 2;
int p = compute_p( N );
- N = ( p << N );
- int *instrs_map = calloc( 1, N );
-AGAIN:
+ N = ( 1 << p );
+ int *instrs_map = calloc( N, sizeof( int ) * 2 );
for( int i = 0; i < nof_instrs; i++ ) {
- int n = mul_hash( instrs[i].address, p );
- assert( n < N );
- if( instrs_map[n] != 0 ) {
- fprintf( stderr, "WARN: hash collision in instruction map, reallocating hash..\n" );
- assert( N < 31 );
- N <<= 1;
- instrs_map = realloc( instrs_map, N );
- goto AGAIN;
+ int n = mul_hash( instrs[i].address, p ) * 2;
+ assert( n < 2 * N );
+ while( instrs_map[n+1] != 0 ) {
+ n += 2;
+ if( n >= 2 * N ) {
+ n = 0;
+ }
}
instrs_map[n] = i;
- printf( "map %08X %d %d\n", instrs[i].address, n, i );
+ instrs_map[n+1] = instrs[i].address;
}
-
+
+ // verify the EIP to instr index map has been constructed correctly
+ for( int i = 0; i < nof_instrs; i++ ) {
+ uint64_t a = instrs[i].address;
+ int n = mul_hash( a, p ) * 2;
+ while( instrs_map[n+1] != a ) {
+ n += 2;
+ if( n >= 2 * N ) {
+ n = 0;
+ }
+ }
+ n = instrs_map[n];
+ assert( n == i );
+ }
+
// write executable code to emulator
uerr = uc_mem_write( uc, CODE_START, code, code_size );
if( uerr != UC_ERR_OK ) {
@@ -199,15 +211,23 @@ AGAIN:
//~ cs_insn *instr = cs_malloc( cs );
bool terminate = false;
int iteration = 1;
-
+
+ printf( "Single step execution:\n" );
while( !terminate ) {
printf( "-- iteration %d\n", iteration );
iteration++;
- int n = instrs_map[mul_hash( address, p )];
+ int n = mul_hash( address, p ) * 2;
+ while( instrs_map[n+1] != address ) {
+ n += 2;
+ if( n >= 2 * N ) {
+ n = 0;
+ }
+ }
+ n = instrs_map[n];
- printf( "%04X (%d): ", (unsigned int)address, n );
+ printf( "%04X: ", (unsigned int)address );
for( int i = 0; i < instrs[n].size; i++ ) {
printf( "%02X", instrs[n].bytes[i] );
diff --git a/crenshaw/main.pas b/crenshaw/main.pas
index 339484c..0694ad4 100644
--- a/crenshaw/main.pas
+++ b/crenshaw/main.pas
@@ -2,7 +2,7 @@ program Main;
const
TAB = ^I;
- CR = ^M;
+ LF = ^J;
var Look : char;
@@ -101,7 +101,7 @@ begin
if symbols[i].name = name then
found := true;
end;
- if i = 26 then Abort('Table of variable symbols overflowed');
+ if i = 26 then Abort('Table of symbols overflowed');
if not found then begin
symbols[nof_symbols].name := name;
symbols[nof_symbols].sym_type := sym_type;
@@ -117,10 +117,10 @@ begin
Match('(');
Match(')');
RememberName(name, functionType);
- EmitLn('call ' + name);
+ EmitLn('call '+name);
end else begin
RememberName(name, variableType);
- EmitLn('mov eax, [' + name + ']');
+ EmitLn('mov eax,['+name+']');
end;
end;
@@ -201,6 +201,17 @@ begin
end;
end;
+procedure Assignment;
+var name : char;
+begin
+ name := GetName;
+ RememberName(name, variableType);
+ Match('=');
+ Expression;
+ EmitLn('lea ebx,['+name+']');
+ EmitLn('mov [ebx],eax');
+end;
+
procedure Init;
begin
nof_symbols := 0;
@@ -256,6 +267,9 @@ end;
begin
Prologue;
Init;
- Expression;
+ Assignment;
+ Match(LF);
+ Assignment;
+ if Look <> LF then Expected('Newline');
Epilogue;
end.
diff --git a/crenshaw/test.prog b/crenshaw/test.prog
index 880418a..2be58d7 100644
--- a/crenshaw/test.prog
+++ b/crenshaw/test.prog
@@ -1 +1,2 @@
-(4+4)*3/2-3+a-b*f()
+x=(4+4)*3/2-3+a-b*f()
+y=x*2