summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ecomp-c/README26
-rw-r--r--ecomp-c/asm-i386.c425
-rw-r--r--ecomp-c/ec.c8
-rw-r--r--ecomp-c/libc-freestanding.c12
-rw-r--r--ecomp-c/test1.e2
-rw-r--r--ecomp-c/tests/const_assignment_error.eout2
-rw-r--r--ecomp-c/tests/empty_module.eout2
-rw-r--r--ecomp-c/tests/unknown_type.eout2
-rw-r--r--ecomp-c/tests/unknown_variable.eout2
-rw-r--r--ecomp-c/tests/variable_assign_from_constant.eout2
-rw-r--r--ecomp-c/tests/variable_assign_from_expression.eout2
-rw-r--r--ecomp-c/tests/variable_assign_from_type.eout2
-rw-r--r--ecomp-c/tests/variable_assign_from_variable.eout2
-rw-r--r--ecomp-c/tests/variable_name_as_type.eout2
-rw-r--r--ecomp-c/tests/variable_not_initialized.eout2
15 files changed, 472 insertions, 21 deletions
diff --git a/ecomp-c/README b/ecomp-c/README
index 0bc9bfe..4f15aaf 100644
--- a/ecomp-c/README
+++ b/ecomp-c/README
@@ -32,18 +32,27 @@ cat libc-freestanding.c ec.c _start-stub.c | tcc -g -m32 -march=i386 -nostdlib -
# to use libc and syscall of the host
cat libc-hosted.c ec.c | tcc -g -m32 -march=i386 -std=c89 -Werror -Wall -o ec -lbsd -
-# assemble and test in emulator
+assembler
+---------
+
+cat libc-freestanding.c asm-i386.c | gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c -
+cat libc-hosted.c asm-i386.c | gcc -g -O0 -m32 -march=i386 -fno-stack-protector -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c -
+
+usage
+-----
+
+# assemble
./ec < test1.e > test1.asm
+# use the host assembler to produce a binary
fasm test1.asm test1.bin
+# use our own minimalistic assembler
./asm-i386 < test1.asm > test1.bin
gcc -g -Wall -std=c99 -o emul emul.c -lunicorn -lcapstone -pthread
./emul test1.bin
-# testbed
+# run test framework
tests/run_tests.sh
-cat libc-freestanding.c asm-i386.c | gcc -g -O0 -m32 -march=i386 -ffreestanding -fno-stack-protector -nostdlib -emain -Werror -Wno-noreturn -Wall -pedantic -fno-pic -std=c89 -o asm-i386 -x c -
-
links
-----
@@ -128,3 +137,12 @@ detection of uninitialized variables
this might be very hard and heuristical (and the depend on the compiler
optimization level). Simple flows can be statically analyzed, what
to do when conditions, loops and complex data structures come into place?
+
+assembler
+---------
+
+http://ref.x86asm.net/coder32.html
+"Art Of Intel x86 Assembly"
+Intel® 64 and IA-32 Architectures Software Developer’s Manual
+https://www.felixcloutier.com/x86/index.html
+
diff --git a/ecomp-c/asm-i386.c b/ecomp-c/asm-i386.c
index 79bad22..105e3a8 100644
--- a/ecomp-c/asm-i386.c
+++ b/ecomp-c/asm-i386.c
@@ -1,10 +1,433 @@
/*
* minimalistic assember for IA-32, i386
+ *
+ * This assembler is a one pass assembler as we do not want to have
+ * to implement re-seekable files (neither input nor output).
+ *
+ * We only implement the absolute minimal opcodes and addressing modes.
+ *
+ * B8+r XX XX XX XX mov r32, imm32
+ * 50+r push r32
+ * 58+r pop r32
+ * A3 XX XX XX XX mov moffs32, eax
+ * A1 XX XX XX XX mov eax, moffs32
+ * F7 E3 mul ebx
+ * F7 F3 div ebx
+ * F4 hlt
+ *
+ * r32
+ * eax 000
+ * ecx 001
+ * edx 010
+ * ebx 011
+ *
+ * imm32
+ * little endian 32-bit constant
+ *
+ * moffs32
+ * offset to DS or SS
+ *
+ * format binary
+ * use32
+ * org 0x0000
+ * dd 0x0000
*/
+/* constants */
+
+enum {
+ MAX_IDENT_LEN = 64,
+ MAX_NUMBER_LEN = 10,
+ MAX_HEXNUMBER_LEN = 8
+};
+
+static int DEBUG_GETCHAR = 1;
+static int DEBUG_SCANNER = 1;
+
+/* scanner */
+
+typedef enum {
+ S_format,
+ S_binary,
+ S_use32,
+ S_org,
+ S_ident,
+ S_number,
+ S_eof
+} S_Symbol;
+
+static char *symname[S_eof+1] = {
+ "format",
+ "binary",
+ "use32",
+ "org",
+ "ident",
+ "number",
+ "eof"
+};
+
+static int col;
+static int row;
+static int look;
+
+static S_Symbol sym;
+
+static char ident[MAX_IDENT_LEN+1];
+static int num;
+
+static void Err( char *s, va_list args )
+{
+ fprintf( stderr, "Error line %d, pos %d: ", row, col );
+ vfprintf( stderr, s, args );
+ fputs( "\n", stderr );
+ fflush( stderr );
+}
+
+static void Halt( int code )
+{
+ exit( code );
+}
+
+static void Abort( char *s, ... )
+{
+ va_list args;
+ va_start( args, s );
+ Err( s, args );
+ va_end( args );
+ Halt( EXIT_FAILURE );
+}
+
+/*
+static void *Allocate( unsigned int size )
+{
+ char *p;
+
+ p = malloc( size );
+ if( p == NULL ) {
+ Abort( "Out of memory" );
+ }
+
+ return p;
+}
+*/
+
+static int getChar( void )
+{
+ int c;
+
+ c = getchar( );
+ if( DEBUG_GETCHAR ) {
+ if( c == '\n' ) {
+ fprintf( stderr, "getchar -> '\\n'\n" );
+ } else if( c == EOF ) {
+ fprintf( stderr, "getchar -> 'EOF'\n" );
+ } else {
+ fprintf( stderr, "getchar -> '%c'\n", c );
+ }
+ }
+ if( c == EOF ) {
+ return c;
+ }
+
+ col++;
+ if( c == '\n' ) {
+ col = 1;
+ row++;
+ }
+
+ return c;
+}
+
+static int isWhite( int c )
+{
+ if( c == ' ' || c == '\r' || c == '\n' || c == '\t' ) return 1;
+ return 0;
+}
+
+static int isAlpha( int c )
+{
+ if( ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ) ) return 1;
+ return 0;
+}
+
+static int isDigit( int c )
+{
+ if( ( c >= '0' && c <= '9' ) ) return 1;
+ return 0;
+}
+
+static int isHexDigit( int c )
+{
+ if( ( c >= '0' && c <= '9' ) || ( c >= 'a' && c <= 'f' ) || ( c >= 'A' && c <= 'F' ) ) return 1;
+ return 0;
+}
+
+static int isSpecial( int c )
+{
+ if( c == '_' ) return 1;
+ return 0;
+}
+
+static void skipWhite( void )
+{
+ while( isWhite( look ) ) {
+ look = getChar( );
+ }
+}
+
+static void number( void )
+{
+ int n = 0;
+
+ if( isDigit( look ) ) {
+ num = look - '0';
+ look = getChar( );
+ while( isDigit( look ) && n < MAX_NUMBER_LEN ) {
+ n++;
+ num = 10 * num + ( look - '0' );
+ look = getChar( );
+ }
+ if( n == MAX_NUMBER_LEN ) {
+ Abort( "Number exceeds maximal length" );
+ }
+ sym = S_number;
+ }
+}
+
+static void hexnumber( void )
+{
+ int n = 0;
+
+ look = getChar( );
+ if( isHexDigit( look ) ) {
+ num = look - '0';
+ look = getChar( );
+ while( isHexDigit( look ) && n < MAX_HEXNUMBER_LEN ) {
+ n++;
+ if( isDigit( look ) ) {
+ num = 16 * num + ( look - '0' );
+ } else if( look >= 'a' && look <= 'f' ) {
+ num = 16 * num + ( look - 'a' );
+ } else {
+ num = 16 * num + ( look - 'A' );
+ }
+ look = getChar( );
+ }
+ if( n == MAX_HEXNUMBER_LEN ) {
+ Abort( "Hexadecimal number exceeds maximal length" );
+ }
+ sym = S_number;
+ }
+}
+
+static void identifier( void )
+{
+ int n = 0;
+
+ if( isAlpha( look ) ) {
+ ident[n] = look;
+ n++;
+ look = getChar( );
+ while( ( isAlpha( look ) || isDigit( look ) || isSpecial( look ) ) && n < MAX_IDENT_LEN ) {
+ ident[n] = look;
+ n++;
+ look = getChar( );
+ }
+ ident[n] = '\0';
+ if( n == MAX_IDENT_LEN ) {
+ Abort( "Identifier exceeds maximal length" );
+ }
+ sym = S_ident;
+ }
+}
+
+static S_Symbol getSym( void )
+{
+ int s = S_eof;
+
+ skipWhite( );
+
+ switch( look ) {
+ case '$':
+ hexnumber( );
+ s = S_number;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ number( );
+ s = S_number;
+ break;
+ case 'f':
+ identifier( );
+ if( strcmp( ident, "format" ) == 0 ) {
+ s = S_format;
+ } else {
+ s = S_ident;
+ }
+ break;
+ case 'b':
+ identifier( );
+ if( strcmp( ident, "binary" ) == 0 ) {
+ s = S_binary;
+ } else {
+ s = S_ident;
+ }
+ break;
+ case 'o':
+ identifier( );
+ if( strcmp( ident, "org" ) == 0 ) {
+ s = S_org;
+ } else {
+ s = S_ident;
+ }
+ break;
+ case 'u':
+ identifier( );
+ if( strcmp( ident, "use32" ) == 0 ) {
+ s = S_use32;
+ } else {
+ s = S_ident;
+ }
+ break;
+ case 'a':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ identifier( );
+ s = S_ident;
+ break;
+ case EOF:
+ s = S_eof;
+ break;
+ default:
+ Abort( "Illegal character '%c'", (char)look );
+ }
+
+ if( DEBUG_SCANNER ) {
+ switch( s ) {
+ case S_ident:
+ fprintf( stderr, "sym -> %s( '%s' )\n", symname[s], ident );
+ break;
+
+ default:
+ fprintf( stderr, "sym -> %s\n", symname[s] );
+ }
+ }
+
+ return s;
+}
+
+/* parser */
+
+static void Expect( S_Symbol expect )
+{
+ if( sym == expect ) {
+ sym = getSym( );
+ } else {
+ Abort( "Expected symbol '%s'", symname[expect] );
+ }
+}
+
+static void parseHeader( void )
+{
+ Expect( S_format );
+ Expect( S_binary ); /* the only format we support for now */
+ Expect( S_use32 ); /* for now the only width we support */
+}
+
+static void init( void )
+{
+ col = 1;
+ row = 1;
+ look = getChar( );
+
+ sym = getSym( );
+}
+
+static void prologue( void )
+{
+ /* raw hex, entry point is fix at ORG,
+ * data follows code
+ */
+}
+
+static void epilogue( void )
+{
+}
+
+static void deinit( void )
+{
+}
+
int main( void )
{
- exit( EXIT_SUCCESS );
+ init( );
+ prologue( );
+ parseHeader( );
+ while( sym != S_eof ) {
+ sym = getSym( );
+ }
+ if( sym != S_eof ) {
+ Abort( "Unexpected EOF" );
+ }
+ epilogue( );
+ deinit( );
+
+ malloc_stats( );
+
+ Halt( EXIT_SUCCESS );
return 0;
}
diff --git a/ecomp-c/ec.c b/ecomp-c/ec.c
index c26b2ff..62fd19f 100644
--- a/ecomp-c/ec.c
+++ b/ecomp-c/ec.c
@@ -10,7 +10,7 @@
enum {
MAX_IDENT_LEN = 64,
- MAX_NUMBER_LEN = 9,
+ MAX_NUMBER_LEN = 10,
MAX_NUMBER_OF_ENUMERATIONS = 6
};
@@ -179,7 +179,6 @@ static void number( void )
if( n == MAX_NUMBER_LEN ) {
Abort( "Number exceeds maximal length" );
}
- sym = S_number;
}
}
@@ -200,7 +199,6 @@ static void identifier( void )
if( n == MAX_IDENT_LEN ) {
Abort( "Identifier exceeds maximal length" );
}
- sym = S_ident;
}
}
@@ -243,7 +241,7 @@ static S_Symbol getSym( void )
case '8':
case '9':
number( );
- s = S_number;
+ s = S_number;
break;
case 'b':
identifier( );
@@ -947,7 +945,7 @@ static void prologue( void )
/* fasm */
Emit( "format binary\n" );
Emit( "use32\n" );
- Emit( "org 0x1000000\n" );
+ Emit( "org $1000000\n" );
}
static void epilogue( void )
diff --git a/ecomp-c/libc-freestanding.c b/ecomp-c/libc-freestanding.c
index 1a53465..4d857da 100644
--- a/ecomp-c/libc-freestanding.c
+++ b/ecomp-c/libc-freestanding.c
@@ -366,6 +366,18 @@ enum {
EOF = -1
};
+int fputc( int c, FILE *stream )
+{
+ char s[2];
+
+ s[0] = c;
+ s[1] = '\0';
+
+ print_string( stream->fileno, s );
+
+ return 0;
+}
+
int fputs( const char *s, FILE *stream )
{
print_string( stream->fileno, s );
diff --git a/ecomp-c/test1.e b/ecomp-c/test1.e
index d276f9c..35ae6de 100644
--- a/ecomp-c/test1.e
+++ b/ecomp-c/test1.e
@@ -25,5 +25,5 @@ begin
a := a + 1; // a should be 8 now
d := a * c + b; // d should be 8 * 20 + 7 = 167 (A7 hex)
d := a * ( c + b ); // d should be 8 * ( 20 + 7 ) = 216 (D8 hex)
- e := ( ( 7 * a + b ) + 2 * ( b + a + 3 ) ) * 2; // ((7*8+7)+2*(7+8+3))*2=198 (C6 hex)
+ e := ( ( 7 * a + b ) + 2 * ( b + a + 3 ) ) * 4 / 2; // ((7*8+7)+2*(7+8+3))*2=198 (C6 hex)
end
diff --git a/ecomp-c/tests/const_assignment_error.eout b/ecomp-c/tests/const_assignment_error.eout
index 43b17ba..5f36409 100644
--- a/ecomp-c/tests/const_assignment_error.eout
+++ b/ecomp-c/tests/const_assignment_error.eout
@@ -1,4 +1,4 @@
format binary
use32
-org 0x1000000
+org $1000000
; CONST N -> integer, 20
diff --git a/ecomp-c/tests/empty_module.eout b/ecomp-c/tests/empty_module.eout
index 1f886b1..ef4e524 100644
--- a/ecomp-c/tests/empty_module.eout
+++ b/ecomp-c/tests/empty_module.eout
@@ -1,4 +1,4 @@
format binary
use32
-org 0x1000000
+org $1000000
hlt
diff --git a/ecomp-c/tests/unknown_type.eout b/ecomp-c/tests/unknown_type.eout
index bcdeec9..0883ccb 100644
--- a/ecomp-c/tests/unknown_type.eout
+++ b/ecomp-c/tests/unknown_type.eout
@@ -1,3 +1,3 @@
format binary
use32
-org 0x1000000
+org $1000000
diff --git a/ecomp-c/tests/unknown_variable.eout b/ecomp-c/tests/unknown_variable.eout
index bcdeec9..0883ccb 100644
--- a/ecomp-c/tests/unknown_variable.eout
+++ b/ecomp-c/tests/unknown_variable.eout
@@ -1,3 +1,3 @@
format binary
use32
-org 0x1000000
+org $1000000
diff --git a/ecomp-c/tests/variable_assign_from_constant.eout b/ecomp-c/tests/variable_assign_from_constant.eout
index 37b75de..b8fbee6 100644
--- a/ecomp-c/tests/variable_assign_from_constant.eout
+++ b/ecomp-c/tests/variable_assign_from_constant.eout
@@ -1,6 +1,6 @@
format binary
use32
-org 0x1000000
+org $1000000
; CONST N -> integer, 20
; DECL a -> integer
; LET a <- 20
diff --git a/ecomp-c/tests/variable_assign_from_expression.eout b/ecomp-c/tests/variable_assign_from_expression.eout
index eefb735..05bf722 100644
--- a/ecomp-c/tests/variable_assign_from_expression.eout
+++ b/ecomp-c/tests/variable_assign_from_expression.eout
@@ -1,6 +1,6 @@
format binary
use32
-org 0x1000000
+org $1000000
; CONST N -> integer, 20
; DECL a -> integer
; DECL b -> integer
diff --git a/ecomp-c/tests/variable_assign_from_type.eout b/ecomp-c/tests/variable_assign_from_type.eout
index 862f733..3c00c78 100644
--- a/ecomp-c/tests/variable_assign_from_type.eout
+++ b/ecomp-c/tests/variable_assign_from_type.eout
@@ -1,5 +1,5 @@
format binary
use32
-org 0x1000000
+org $1000000
; CONST N -> integer, 20
; DECL a -> integer
diff --git a/ecomp-c/tests/variable_assign_from_variable.eout b/ecomp-c/tests/variable_assign_from_variable.eout
index 9b1c871..3661820 100644
--- a/ecomp-c/tests/variable_assign_from_variable.eout
+++ b/ecomp-c/tests/variable_assign_from_variable.eout
@@ -1,6 +1,6 @@
format binary
use32
-org 0x1000000
+org $1000000
; CONST N -> integer, 20
; DECL a -> integer
; DECL b -> integer
diff --git a/ecomp-c/tests/variable_name_as_type.eout b/ecomp-c/tests/variable_name_as_type.eout
index fcca7f9..ffbfbe0 100644
--- a/ecomp-c/tests/variable_name_as_type.eout
+++ b/ecomp-c/tests/variable_name_as_type.eout
@@ -1,4 +1,4 @@
format binary
use32
-org 0x1000000
+org $1000000
; DECL a -> integer
diff --git a/ecomp-c/tests/variable_not_initialized.eout b/ecomp-c/tests/variable_not_initialized.eout
index 226d99a..665c916 100644
--- a/ecomp-c/tests/variable_not_initialized.eout
+++ b/ecomp-c/tests/variable_not_initialized.eout
@@ -1,5 +1,5 @@
format binary
use32
-org 0x1000000
+org $1000000
; DECL a -> integer
; DECL b -> integer