summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2020-07-25 21:11:58 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2020-07-25 21:11:58 +0200
commitb9adc27dff1de4b062523bd287757e1b82f56e8c (patch)
tree55edef22b9dcd9cbaed9059e1c4898159e2c25bf
parent8f3209b03e3bfdc2f6cb483db104c5ae9aaeb76c (diff)
downloadcompilertests-b9adc27dff1de4b062523bd287757e1b82f56e8c.tar.gz
compilertests-b9adc27dff1de4b062523bd287757e1b82f56e8c.tar.bz2
asm-i386: some work on expresion tree evaluations and new opcodes
- added dw, dd and arbitrary length data - added INT nn for syscalls - rearanged the elf.asm test so it only needs the minimal set of opcodes we have - added and assignment for variables to expressions - some work on evaluation of expressions
-rw-r--r--ecomp-c/asm-i386.c228
-rw-r--r--ecomp-c/tests/asm-i386/elf.asm9
2 files changed, 204 insertions, 33 deletions
diff --git a/ecomp-c/asm-i386.c b/ecomp-c/asm-i386.c
index d4e1aab..d1d2f57 100644
--- a/ecomp-c/asm-i386.c
+++ b/ecomp-c/asm-i386.c
@@ -33,6 +33,7 @@
* E8 XX XX XX XX call rel32
* C3 ret
* F4 hlt
+ * CD XX int nnn
* 90 nop
*
* r32/rr
@@ -53,8 +54,9 @@
*
* format binary
* use32
- * org 0x0000
- * dd 0x0000
+ * org 0x00000000
+ * dd 0x00000000 (1 to N arguments), integer only
+ * dw 0x0000 (1 to N arguments), integer only
* db 0x00 (1 to N arguments), combinable with string literals
* db "abcd"
*/
@@ -70,8 +72,8 @@ enum {
};
static int DEBUG_GETCHAR = 0;
-static int DEBUG_SCANNER = 0;
-static int DEBUG_PARSER = 0;
+static int DEBUG_SCANNER = 1;
+static int DEBUG_PARSER = 1;
/* scanner */
@@ -92,6 +94,7 @@ typedef enum {
S_minus,
S_star,
S_slash,
+ S_equals,
S_current_org,
S_current_addr,
S_eof
@@ -114,6 +117,7 @@ static char *symname[S_eof+1] = {
"-",
"*",
"/",
+ "=",
"$$",
"$",
"eof"
@@ -355,7 +359,6 @@ static void skipEolComment( void )
while( look != '\n' ) {
look = getChar( );
}
- look = getChar( );
}
static S_Symbol getSym( void )
@@ -414,17 +417,25 @@ static S_Symbol getSym( void )
s = S_number;
break;
case '-':
+ look = getChar( );
s = S_minus;
break;
case '+':
+ look = getChar( );
s = S_plus;
break;
case '*':
+ look = getChar( );
s = S_star;
break;
case '/':
+ look = getChar( );
s = S_slash;
break;
+ case '=':
+ look = getChar( );
+ s = S_equals;
+ break;
case 'f':
identifier( );
if( strcmp( ident, "format" ) == 0 ) {
@@ -611,6 +622,12 @@ static void Emit_byte( int d )
Emit_char( ( d & 0xFF ) );
}
+static void Emit_word_little_endian( int d )
+{
+ Emit_char( ( d & 0xFF ) );
+ Emit_char( ( d >> 8 ) & 0xFF );
+}
+
static void Emit_double_little_endian( int d )
{
Emit_char( ( d & 0xFF ) );
@@ -711,7 +728,9 @@ static Symbol *parseLabel( void )
typedef enum {
OPCODE_PSEUDO_ORG,
OPCODE_PSEUDO_DD,
+ OPCODE_PSEUDO_DW,
OPCODE_PSEUDO_DB,
+ OPCODE_PSEUDO_ASSIGN,
OPCODE_MOV,
OPCODE_PUSH,
OPCODE_POP,
@@ -727,6 +746,7 @@ typedef enum {
OPCODE_JBE,
OPCODE_JA,
OPCODE_JAE,
+ OPCODE_INT,
OPCODE_HLT,
OPCODE_NOP,
OPCODE_CALL,
@@ -737,7 +757,9 @@ typedef enum {
static char *opcodename[OPCODE_UNKNOWN+1] = {
"org",
"dd",
+ "dw",
"db",
+ "=",
"mov",
"push",
"pop",
@@ -753,6 +775,7 @@ static char *opcodename[OPCODE_UNKNOWN+1] = {
"jbe",
"ja",
"jae",
+ "int",
"hlt",
"nop",
"call",
@@ -848,6 +871,10 @@ static OpcodeInfo *parseOpcode( void )
opcode_info->opcode = OPCODE_PSEUDO_DB;
opcode_info->nof_operands = 1;
opcode_info->size = 1;
+ } else if( strcmp( ident, "dw" ) == 0 ) {
+ opcode_info->opcode = OPCODE_PSEUDO_DW;
+ opcode_info->nof_operands = 1;
+ opcode_info->size = 2;
} else if( strcmp( ident, "div" ) == 0 ) {
opcode_info->opcode = OPCODE_DIV;
opcode_info->nof_operands = 1;
@@ -861,6 +888,13 @@ static OpcodeInfo *parseOpcode( void )
opcode_info->size = 1;
}
break;
+ case 'i':
+ if( strcmp( ident, "int" ) == 0 ) {
+ opcode_info->opcode = OPCODE_INT;
+ opcode_info->nof_operands = 1;
+ opcode_info->size = 2;
+ }
+ break;
case 'j':
switch( ident[1] ) {
case 'm':
@@ -1102,7 +1136,6 @@ static ExpressionNode *parseTerm( void )
ExpressionNode *node, *tmp;
node = parseFactor( );
-
while( sym == S_star || sym == S_slash ) {
tmp = node;
node = create_expression_node( );
@@ -1134,16 +1167,39 @@ static ExpressionNode *parseExpression( void )
return node;
}
-static int evaluateExpression( ExpressionNode *node )
+static int evaluateExpression( ExpressionNode *node, int ORG, int LC )
{
+ int left;
+ int right;
+ int num;
+
switch( node->type ) {
case EXPRESSION_NODE_TYPE_CONST:
- return node->integer_value;
+ if( node->integer_value == ADDRESS_CURRENT_ORG ) {
+ return ORG;
+ } else if( node->integer_value == ADDRESS_CURRENT_ADDR ) {
+ return LC;
+ } else {
+ return node->integer_value;
+ }
+
case EXPRESSION_NODE_TYPE_VAR:
return node->symbol->addr;
- /* TODO */
+
case EXPRESSION_NODE_TYPE_OP:
- Abort( "Const node expression node not implemented yet" );
+ left = evaluateExpression( node->left, ORG, LC );
+ right = evaluateExpression( node->right, ORG, LC );
+ switch( node->op ) {
+ case S_plus:
+ num = left + right;
+ break;
+ case S_minus:
+ num = left - right;
+ break;
+ default:
+ Abort( "Unknown operation '%s' in expression", symname[node->op] );
+ }
+ return num;
}
return 0;
@@ -1175,7 +1231,6 @@ static OperandInfo *parseOperand( OpcodeInfo *opcode_info )
*/
operand_info->type = OPERAND_ABSOLUTE;
operand_info->node = parseExpression( );
- operand_info->num = evaluateExpression( operand_info->node );
} else if( sym == S_string ) {
operand_info->type = OPERAND_ABSOLUTE;
operand_info->str = AllocateAndCopyStr( str );
@@ -1236,7 +1291,10 @@ static void parseOperands( OpcodeInfo *opcode_info )
}
}
- if( opcode_info->opcode != OPCODE_PSEUDO_DB && nof_operands != opcode_info->nof_operands ) {
+ if( opcode_info->opcode != OPCODE_PSEUDO_DB &&
+ opcode_info->opcode != OPCODE_PSEUDO_DW &&
+ opcode_info->opcode != OPCODE_PSEUDO_DD &&
+ nof_operands != opcode_info->nof_operands ) {
Abort( "'%s' expects %d operand(s), %d given", opcodename[opcode_info->opcode], opcode_info->nof_operands, nof_operands );
}
@@ -1244,8 +1302,30 @@ static void parseOperands( OpcodeInfo *opcode_info )
case OPCODE_HLT:
case OPCODE_NOP:
case OPCODE_RET:
- case OPCODE_PSEUDO_DD:
break;
+ case OPCODE_PSEUDO_DD:
+ case OPCODE_PSEUDO_DW: {
+ OperandInfo *operand_info;
+ int i = 0;
+ int size = 0;
+
+ operand_info = opcode_info->operand;
+ while( operand_info != NULL ) {
+ switch( opcode_info->opcode ) {
+ case OPCODE_PSEUDO_DW:
+ size += 2;
+ break;
+ case OPCODE_PSEUDO_DD:
+ size += 4;
+ default:
+ break;
+ }
+ i++;
+ operand_info = operand_info->next;
+ }
+ opcode_info->nof_operands = i;
+ opcode_info->size = size;
+ } break;
case OPCODE_PSEUDO_DB: {
OperandInfo *operand_info;
int i = 0;
@@ -1327,6 +1407,11 @@ static void parseOperands( OpcodeInfo *opcode_info )
Abort( "'%s' expects a jump label", opcodename[opcode_info->opcode] );
}
break;
+ case OPCODE_INT:
+ if( opcode_info->operand->next->type != OPERAND_ABSOLUTE ) {
+ /* int $80, jump to interrupt vector number */
+ }
+ break;
default:
Abort( "Unhandled opcode '%s' when checking operand validity", opcodename[opcode_info->opcode] );
}
@@ -1372,12 +1457,58 @@ static OpcodeInfo *parseOrg( void )
return opcode_info;
}
+static OpcodeInfo *parseAssignment( void )
+{
+ OpcodeInfo *opcode_info;
+ OperandInfo *operand_info;
+ Symbol *symbol;
+
+ opcode_info = Allocate( sizeof( OpcodeInfo ) );
+ opcode_info->addr = 0;
+ opcode_info->opcode = OPCODE_PSEUDO_ASSIGN;
+ opcode_info->operand = NULL;
+ opcode_info->next = NULL;
+ opcode_info->size = 0;
+
+ symbol = get_symbol( ident );
+ if( symbol != NULL && symbol->defined ) {
+ Abort( "Constant '%s' has already been defined", ident );
+ } else {
+ if( symbol == NULL ) {
+ symbol = insert_symbol( ident );
+ }
+ }
+ symbol->defined = 1;
+ sym = getSym( );
+ Expect( S_equals );
+
+ operand_info = Allocate( sizeof( OperandInfo ) );
+ operand_info->next = NULL;
+ operand_info->type = OPERAND_ABSOLUTE;
+ operand_info->num = 0;
+ operand_info->str = NULL;
+ operand_info->node = parseExpression( );
+
+ append_operand( opcode_info, operand_info );
+
+ /* TODO: symbols are only label, adresses for now, so we
+ * abuse it to store constants, we could also do lazy
+ * evaluation */
+ /* TODO: make an assignment node, cannot evaluate here without
+ * current ORG and LC!
+ * symbol->addr = evaluateExpression( node );
+ */
+
+ return opcode_info;
+}
+
Symbol *last_label = NULL;
static OpcodeInfo *parseDirective( void )
{
OpcodeInfo *opcode_info = NULL;
Symbol *symbol;
+ S_Symbol peek;
switch( sym ) {
case S_org:
@@ -1385,7 +1516,8 @@ static OpcodeInfo *parseDirective( void )
break;
case S_ident:
- if( peekSym( ) == S_colon ) {
+ peek = peekSym( );
+ if( peek == S_colon ) {
symbol = parseLabel( );
if( last_label != NULL ) {
symbol->next_label = last_label;
@@ -1396,6 +1528,8 @@ static OpcodeInfo *parseDirective( void )
}
opcode_info = parseOperation( last_label );
last_label = NULL;
+ } else if( peek == S_equals ) {
+ parseAssignment( );
} else {
opcode_info = parseOperation( last_label );
last_label = NULL;
@@ -1477,7 +1611,7 @@ static int relative_distance( OpcodeInfo *opcode_info, int src_addr, int dest_ad
return rel;
}
-static int patchup_addresses( OpcodeInfo *opcode_info )
+static int patchup_addresses( OpcodeInfo *opcode_info, int ORG )
{
OpcodeInfo *opcode = opcode_info;
OperandInfo *operand;
@@ -1544,7 +1678,7 @@ static int patchup_addresses( OpcodeInfo *opcode_info )
if( operand->type == OPERAND_MEMORY_DIRECT || operand->type == OPERAND_MEMORY_INDIRECT ) {
if( operand->addr == ADDRESS_UNDEFINED ) {
if( operand->node != NULL ) {
- operand->addr = evaluateExpression( operand->node );
+ operand->addr = evaluateExpression( operand->node, ORG, opcode->addr );
}
}
}
@@ -1671,23 +1805,26 @@ static void check_for_undefined_labels( Symbol *symbol )
}
}
-static void compute_addresses( OpcodeInfo *opcode_info )
+static int compute_addresses( OpcodeInfo *opcode_info )
{
OpcodeInfo *opcode = opcode_info;
OperandInfo *operand;
+ int ORG = 0;
int LC = 0;
Symbol *label;
while( opcode != NULL ) {
switch( opcode->opcode ) {
case OPCODE_PSEUDO_ORG:
- LC = opcode->operand->num;
+ ORG = opcode->operand->num;
+ LC = ORG;
if( DEBUG_PARSER ) {
- fprintf( stderr, "LC set to $%X\n", LC );
+ fprintf( stderr, "ORG/LC set to $%X\n", LC );
}
break;
case OPCODE_PSEUDO_DD:
+ case OPCODE_PSEUDO_DW:
case OPCODE_PSEUDO_DB:
case OPCODE_MOV:
case OPCODE_PUSH:
@@ -1704,6 +1841,7 @@ static void compute_addresses( OpcodeInfo *opcode_info )
case OPCODE_JBE:
case OPCODE_JA:
case OPCODE_JAE:
+ case OPCODE_INT:
case OPCODE_HLT:
case OPCODE_NOP:
case OPCODE_CALL:
@@ -1723,12 +1861,27 @@ static void compute_addresses( OpcodeInfo *opcode_info )
operand = opcode->operand;
while( operand != NULL ) {
- if( operand->type == OPERAND_MEMORY_DIRECT ||
- operand->type == OPERAND_MEMORY_INDIRECT ) {
- operand->addr = evaluateExpression( operand->node );
- if( DEBUG_PARSER ) {
- fprintf( stderr, "LC=$%X assigned to operand at $%X\n", LC, operand->addr );
- }
+ switch( operand->type ) {
+ case OPERAND_ABSOLUTE:
+ if( operand->node != NULL ) {
+ operand->num = evaluateExpression( operand->node, ORG, LC );
+ }
+ break;
+ case OPERAND_MEMORY_DIRECT:
+ case OPERAND_MEMORY_INDIRECT:
+ if( operand->node != NULL ) {
+ operand->addr = evaluateExpression( operand->node, ORG, LC );
+ }
+ if( DEBUG_PARSER ) {
+ fprintf( stderr, "LC=$%X assigned to operand at $%X\n", LC, operand->addr );
+ }
+ break;
+ case OPERAND_REGISTER:
+ case OPERAND_REGISTER_INDIRECT:
+ break;
+ default:
+ Abort( "Unhandled case when computing operands from expressions" );
+
}
operand = operand->next;
}
@@ -1739,15 +1892,18 @@ static void compute_addresses( OpcodeInfo *opcode_info )
}
opcode = opcode->next;
}
+
+ return ORG;
}
static void generate_code( OpcodeInfo *opcode_info )
{
int has_to_relocate = 0;
int pass = 0;
+ int ORG = 0;
do {
- compute_addresses( opcode_info );
+ ORG = compute_addresses( opcode_info );
pass++;
if( pass > MAX_PASSES ) {
Abort( "Too many passes" );
@@ -1760,7 +1916,7 @@ static void generate_code( OpcodeInfo *opcode_info )
print_labels( symbol );
}
check_for_undefined_labels( symbol );
- has_to_relocate = patchup_addresses( opcode_info );
+ has_to_relocate = patchup_addresses( opcode_info, ORG );
} while( has_to_relocate );
compute_addresses( opcode_info );
@@ -1784,7 +1940,18 @@ static void emit_opcode( OpcodeInfo *opcode_info )
case OPCODE_PSEUDO_ORG:
break;
case OPCODE_PSEUDO_DD:
- Emit_double_little_endian( opcode_info->operand->num );
+ operand = opcode_info->operand;
+ while( operand != NULL ) {
+ Emit_double_little_endian( opcode_info->operand->num );
+ operand = operand->next;
+ }
+ break;
+ case OPCODE_PSEUDO_DW:
+ operand = opcode_info->operand;
+ while( operand != NULL ) {
+ Emit_word_little_endian( operand->num );
+ operand = operand->next;
+ }
break;
case OPCODE_PSEUDO_DB:
operand = opcode_info->operand;
@@ -1955,6 +2122,11 @@ static void emit_opcode( OpcodeInfo *opcode_info )
Emit( "%c%c", 0x73, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->addr ) );
}
break;
+ case OPCODE_INT:
+ if( opcode_info->operand->type == OPERAND_ABSOLUTE ) {
+ Emit( "%c%c", 0xCD, opcode_info->operand->num );
+ }
+ break;
case OPCODE_CALL:
if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) {
int rel = relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->addr );
diff --git a/ecomp-c/tests/asm-i386/elf.asm b/ecomp-c/tests/asm-i386/elf.asm
index 3fba61e..d5278ff 100644
--- a/ecomp-c/tests/asm-i386/elf.asm
+++ b/ecomp-c/tests/asm-i386/elf.asm
@@ -2,9 +2,6 @@ format binary
use32
org $08048000
ehdr:
-mov eax, $$
-mov ebx, $
-mov ecx, $1
db $7F, "ELF" ; e_ident: magic
db 1 ; EI_CLASS: ELFCLASS32
db 1 ; EI_BYTE: ELFDATA2LSB (little endian, 2's complement)
@@ -33,13 +30,15 @@ dd $$ ; p_paddr
dd filesize ; p_filesz
dd filesize ; p_memsz
dd 7 ; p_flags: Read, Write & Execute
-dd 0x1000 ; p_align
+dd $1000 ; p_align
phdrsize = $ - phdr
_start:
mov eax, 42
mov [a], eax
+mov eax, [a]
+push eax
+pop ebx
mov eax, 1
-mov ebx, [a]
int $80
a: dd 43
filesize = $ - $$