From b9adc27dff1de4b062523bd287757e1b82f56e8c Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Sat, 25 Jul 2020 21:11:58 +0200 Subject: asm-i386: some work on expresion tree evaluations and new opcodes - added dw, dd and arbitrary length data - added INT nn for syscalls - rearanged the elf.asm test so it only needs the minimal set of opcodes we have - added and assignment for variables to expressions - some work on evaluation of expressions --- ecomp-c/asm-i386.c | 228 ++++++++++++++++++++++++++++++++++++----- ecomp-c/tests/asm-i386/elf.asm | 9 +- 2 files changed, 204 insertions(+), 33 deletions(-) diff --git a/ecomp-c/asm-i386.c b/ecomp-c/asm-i386.c index d4e1aab..d1d2f57 100644 --- a/ecomp-c/asm-i386.c +++ b/ecomp-c/asm-i386.c @@ -33,6 +33,7 @@ * E8 XX XX XX XX call rel32 * C3 ret * F4 hlt + * CD XX int nnn * 90 nop * * r32/rr @@ -53,8 +54,9 @@ * * format binary * use32 - * org 0x0000 - * dd 0x0000 + * org 0x00000000 + * dd 0x00000000 (1 to N arguments), integer only + * dw 0x0000 (1 to N arguments), integer only * db 0x00 (1 to N arguments), combinable with string literals * db "abcd" */ @@ -70,8 +72,8 @@ enum { }; static int DEBUG_GETCHAR = 0; -static int DEBUG_SCANNER = 0; -static int DEBUG_PARSER = 0; +static int DEBUG_SCANNER = 1; +static int DEBUG_PARSER = 1; /* scanner */ @@ -92,6 +94,7 @@ typedef enum { S_minus, S_star, S_slash, + S_equals, S_current_org, S_current_addr, S_eof @@ -114,6 +117,7 @@ static char *symname[S_eof+1] = { "-", "*", "/", + "=", "$$", "$", "eof" @@ -355,7 +359,6 @@ static void skipEolComment( void ) while( look != '\n' ) { look = getChar( ); } - look = getChar( ); } static S_Symbol getSym( void ) @@ -414,17 +417,25 @@ static S_Symbol getSym( void ) s = S_number; break; case '-': + look = getChar( ); s = S_minus; break; case '+': + look = getChar( ); s = S_plus; break; case '*': + look = getChar( ); s = S_star; break; case '/': + look = getChar( ); s = S_slash; break; + case '=': + look = getChar( ); + s = S_equals; + break; case 'f': identifier( ); if( strcmp( ident, "format" ) == 0 ) { @@ -611,6 +622,12 @@ static void Emit_byte( int d ) Emit_char( ( d & 0xFF ) ); } +static void Emit_word_little_endian( int d ) +{ + Emit_char( ( d & 0xFF ) ); + Emit_char( ( d >> 8 ) & 0xFF ); +} + static void Emit_double_little_endian( int d ) { Emit_char( ( d & 0xFF ) ); @@ -711,7 +728,9 @@ static Symbol *parseLabel( void ) typedef enum { OPCODE_PSEUDO_ORG, OPCODE_PSEUDO_DD, + OPCODE_PSEUDO_DW, OPCODE_PSEUDO_DB, + OPCODE_PSEUDO_ASSIGN, OPCODE_MOV, OPCODE_PUSH, OPCODE_POP, @@ -727,6 +746,7 @@ typedef enum { OPCODE_JBE, OPCODE_JA, OPCODE_JAE, + OPCODE_INT, OPCODE_HLT, OPCODE_NOP, OPCODE_CALL, @@ -737,7 +757,9 @@ typedef enum { static char *opcodename[OPCODE_UNKNOWN+1] = { "org", "dd", + "dw", "db", + "=", "mov", "push", "pop", @@ -753,6 +775,7 @@ static char *opcodename[OPCODE_UNKNOWN+1] = { "jbe", "ja", "jae", + "int", "hlt", "nop", "call", @@ -848,6 +871,10 @@ static OpcodeInfo *parseOpcode( void ) opcode_info->opcode = OPCODE_PSEUDO_DB; opcode_info->nof_operands = 1; opcode_info->size = 1; + } else if( strcmp( ident, "dw" ) == 0 ) { + opcode_info->opcode = OPCODE_PSEUDO_DW; + opcode_info->nof_operands = 1; + opcode_info->size = 2; } else if( strcmp( ident, "div" ) == 0 ) { opcode_info->opcode = OPCODE_DIV; opcode_info->nof_operands = 1; @@ -861,6 +888,13 @@ static OpcodeInfo *parseOpcode( void ) opcode_info->size = 1; } break; + case 'i': + if( strcmp( ident, "int" ) == 0 ) { + opcode_info->opcode = OPCODE_INT; + opcode_info->nof_operands = 1; + opcode_info->size = 2; + } + break; case 'j': switch( ident[1] ) { case 'm': @@ -1102,7 +1136,6 @@ static ExpressionNode *parseTerm( void ) ExpressionNode *node, *tmp; node = parseFactor( ); - while( sym == S_star || sym == S_slash ) { tmp = node; node = create_expression_node( ); @@ -1134,16 +1167,39 @@ static ExpressionNode *parseExpression( void ) return node; } -static int evaluateExpression( ExpressionNode *node ) +static int evaluateExpression( ExpressionNode *node, int ORG, int LC ) { + int left; + int right; + int num; + switch( node->type ) { case EXPRESSION_NODE_TYPE_CONST: - return node->integer_value; + if( node->integer_value == ADDRESS_CURRENT_ORG ) { + return ORG; + } else if( node->integer_value == ADDRESS_CURRENT_ADDR ) { + return LC; + } else { + return node->integer_value; + } + case EXPRESSION_NODE_TYPE_VAR: return node->symbol->addr; - /* TODO */ + case EXPRESSION_NODE_TYPE_OP: - Abort( "Const node expression node not implemented yet" ); + left = evaluateExpression( node->left, ORG, LC ); + right = evaluateExpression( node->right, ORG, LC ); + switch( node->op ) { + case S_plus: + num = left + right; + break; + case S_minus: + num = left - right; + break; + default: + Abort( "Unknown operation '%s' in expression", symname[node->op] ); + } + return num; } return 0; @@ -1175,7 +1231,6 @@ static OperandInfo *parseOperand( OpcodeInfo *opcode_info ) */ operand_info->type = OPERAND_ABSOLUTE; operand_info->node = parseExpression( ); - operand_info->num = evaluateExpression( operand_info->node ); } else if( sym == S_string ) { operand_info->type = OPERAND_ABSOLUTE; operand_info->str = AllocateAndCopyStr( str ); @@ -1236,7 +1291,10 @@ static void parseOperands( OpcodeInfo *opcode_info ) } } - if( opcode_info->opcode != OPCODE_PSEUDO_DB && nof_operands != opcode_info->nof_operands ) { + if( opcode_info->opcode != OPCODE_PSEUDO_DB && + opcode_info->opcode != OPCODE_PSEUDO_DW && + opcode_info->opcode != OPCODE_PSEUDO_DD && + nof_operands != opcode_info->nof_operands ) { Abort( "'%s' expects %d operand(s), %d given", opcodename[opcode_info->opcode], opcode_info->nof_operands, nof_operands ); } @@ -1244,8 +1302,30 @@ static void parseOperands( OpcodeInfo *opcode_info ) case OPCODE_HLT: case OPCODE_NOP: case OPCODE_RET: - case OPCODE_PSEUDO_DD: break; + case OPCODE_PSEUDO_DD: + case OPCODE_PSEUDO_DW: { + OperandInfo *operand_info; + int i = 0; + int size = 0; + + operand_info = opcode_info->operand; + while( operand_info != NULL ) { + switch( opcode_info->opcode ) { + case OPCODE_PSEUDO_DW: + size += 2; + break; + case OPCODE_PSEUDO_DD: + size += 4; + default: + break; + } + i++; + operand_info = operand_info->next; + } + opcode_info->nof_operands = i; + opcode_info->size = size; + } break; case OPCODE_PSEUDO_DB: { OperandInfo *operand_info; int i = 0; @@ -1327,6 +1407,11 @@ static void parseOperands( OpcodeInfo *opcode_info ) Abort( "'%s' expects a jump label", opcodename[opcode_info->opcode] ); } break; + case OPCODE_INT: + if( opcode_info->operand->next->type != OPERAND_ABSOLUTE ) { + /* int $80, jump to interrupt vector number */ + } + break; default: Abort( "Unhandled opcode '%s' when checking operand validity", opcodename[opcode_info->opcode] ); } @@ -1372,12 +1457,58 @@ static OpcodeInfo *parseOrg( void ) return opcode_info; } +static OpcodeInfo *parseAssignment( void ) +{ + OpcodeInfo *opcode_info; + OperandInfo *operand_info; + Symbol *symbol; + + opcode_info = Allocate( sizeof( OpcodeInfo ) ); + opcode_info->addr = 0; + opcode_info->opcode = OPCODE_PSEUDO_ASSIGN; + opcode_info->operand = NULL; + opcode_info->next = NULL; + opcode_info->size = 0; + + symbol = get_symbol( ident ); + if( symbol != NULL && symbol->defined ) { + Abort( "Constant '%s' has already been defined", ident ); + } else { + if( symbol == NULL ) { + symbol = insert_symbol( ident ); + } + } + symbol->defined = 1; + sym = getSym( ); + Expect( S_equals ); + + operand_info = Allocate( sizeof( OperandInfo ) ); + operand_info->next = NULL; + operand_info->type = OPERAND_ABSOLUTE; + operand_info->num = 0; + operand_info->str = NULL; + operand_info->node = parseExpression( ); + + append_operand( opcode_info, operand_info ); + + /* TODO: symbols are only label, adresses for now, so we + * abuse it to store constants, we could also do lazy + * evaluation */ + /* TODO: make an assignment node, cannot evaluate here without + * current ORG and LC! + * symbol->addr = evaluateExpression( node ); + */ + + return opcode_info; +} + Symbol *last_label = NULL; static OpcodeInfo *parseDirective( void ) { OpcodeInfo *opcode_info = NULL; Symbol *symbol; + S_Symbol peek; switch( sym ) { case S_org: @@ -1385,7 +1516,8 @@ static OpcodeInfo *parseDirective( void ) break; case S_ident: - if( peekSym( ) == S_colon ) { + peek = peekSym( ); + if( peek == S_colon ) { symbol = parseLabel( ); if( last_label != NULL ) { symbol->next_label = last_label; @@ -1396,6 +1528,8 @@ static OpcodeInfo *parseDirective( void ) } opcode_info = parseOperation( last_label ); last_label = NULL; + } else if( peek == S_equals ) { + parseAssignment( ); } else { opcode_info = parseOperation( last_label ); last_label = NULL; @@ -1477,7 +1611,7 @@ static int relative_distance( OpcodeInfo *opcode_info, int src_addr, int dest_ad return rel; } -static int patchup_addresses( OpcodeInfo *opcode_info ) +static int patchup_addresses( OpcodeInfo *opcode_info, int ORG ) { OpcodeInfo *opcode = opcode_info; OperandInfo *operand; @@ -1544,7 +1678,7 @@ static int patchup_addresses( OpcodeInfo *opcode_info ) if( operand->type == OPERAND_MEMORY_DIRECT || operand->type == OPERAND_MEMORY_INDIRECT ) { if( operand->addr == ADDRESS_UNDEFINED ) { if( operand->node != NULL ) { - operand->addr = evaluateExpression( operand->node ); + operand->addr = evaluateExpression( operand->node, ORG, opcode->addr ); } } } @@ -1671,23 +1805,26 @@ static void check_for_undefined_labels( Symbol *symbol ) } } -static void compute_addresses( OpcodeInfo *opcode_info ) +static int compute_addresses( OpcodeInfo *opcode_info ) { OpcodeInfo *opcode = opcode_info; OperandInfo *operand; + int ORG = 0; int LC = 0; Symbol *label; while( opcode != NULL ) { switch( opcode->opcode ) { case OPCODE_PSEUDO_ORG: - LC = opcode->operand->num; + ORG = opcode->operand->num; + LC = ORG; if( DEBUG_PARSER ) { - fprintf( stderr, "LC set to $%X\n", LC ); + fprintf( stderr, "ORG/LC set to $%X\n", LC ); } break; case OPCODE_PSEUDO_DD: + case OPCODE_PSEUDO_DW: case OPCODE_PSEUDO_DB: case OPCODE_MOV: case OPCODE_PUSH: @@ -1704,6 +1841,7 @@ static void compute_addresses( OpcodeInfo *opcode_info ) case OPCODE_JBE: case OPCODE_JA: case OPCODE_JAE: + case OPCODE_INT: case OPCODE_HLT: case OPCODE_NOP: case OPCODE_CALL: @@ -1723,12 +1861,27 @@ static void compute_addresses( OpcodeInfo *opcode_info ) operand = opcode->operand; while( operand != NULL ) { - if( operand->type == OPERAND_MEMORY_DIRECT || - operand->type == OPERAND_MEMORY_INDIRECT ) { - operand->addr = evaluateExpression( operand->node ); - if( DEBUG_PARSER ) { - fprintf( stderr, "LC=$%X assigned to operand at $%X\n", LC, operand->addr ); - } + switch( operand->type ) { + case OPERAND_ABSOLUTE: + if( operand->node != NULL ) { + operand->num = evaluateExpression( operand->node, ORG, LC ); + } + break; + case OPERAND_MEMORY_DIRECT: + case OPERAND_MEMORY_INDIRECT: + if( operand->node != NULL ) { + operand->addr = evaluateExpression( operand->node, ORG, LC ); + } + if( DEBUG_PARSER ) { + fprintf( stderr, "LC=$%X assigned to operand at $%X\n", LC, operand->addr ); + } + break; + case OPERAND_REGISTER: + case OPERAND_REGISTER_INDIRECT: + break; + default: + Abort( "Unhandled case when computing operands from expressions" ); + } operand = operand->next; } @@ -1739,15 +1892,18 @@ static void compute_addresses( OpcodeInfo *opcode_info ) } opcode = opcode->next; } + + return ORG; } static void generate_code( OpcodeInfo *opcode_info ) { int has_to_relocate = 0; int pass = 0; + int ORG = 0; do { - compute_addresses( opcode_info ); + ORG = compute_addresses( opcode_info ); pass++; if( pass > MAX_PASSES ) { Abort( "Too many passes" ); @@ -1760,7 +1916,7 @@ static void generate_code( OpcodeInfo *opcode_info ) print_labels( symbol ); } check_for_undefined_labels( symbol ); - has_to_relocate = patchup_addresses( opcode_info ); + has_to_relocate = patchup_addresses( opcode_info, ORG ); } while( has_to_relocate ); compute_addresses( opcode_info ); @@ -1784,7 +1940,18 @@ static void emit_opcode( OpcodeInfo *opcode_info ) case OPCODE_PSEUDO_ORG: break; case OPCODE_PSEUDO_DD: - Emit_double_little_endian( opcode_info->operand->num ); + operand = opcode_info->operand; + while( operand != NULL ) { + Emit_double_little_endian( opcode_info->operand->num ); + operand = operand->next; + } + break; + case OPCODE_PSEUDO_DW: + operand = opcode_info->operand; + while( operand != NULL ) { + Emit_word_little_endian( operand->num ); + operand = operand->next; + } break; case OPCODE_PSEUDO_DB: operand = opcode_info->operand; @@ -1955,6 +2122,11 @@ static void emit_opcode( OpcodeInfo *opcode_info ) Emit( "%c%c", 0x73, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->addr ) ); } break; + case OPCODE_INT: + if( opcode_info->operand->type == OPERAND_ABSOLUTE ) { + Emit( "%c%c", 0xCD, opcode_info->operand->num ); + } + break; case OPCODE_CALL: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { int rel = relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->addr ); diff --git a/ecomp-c/tests/asm-i386/elf.asm b/ecomp-c/tests/asm-i386/elf.asm index 3fba61e..d5278ff 100644 --- a/ecomp-c/tests/asm-i386/elf.asm +++ b/ecomp-c/tests/asm-i386/elf.asm @@ -2,9 +2,6 @@ format binary use32 org $08048000 ehdr: -mov eax, $$ -mov ebx, $ -mov ecx, $1 db $7F, "ELF" ; e_ident: magic db 1 ; EI_CLASS: ELFCLASS32 db 1 ; EI_BYTE: ELFDATA2LSB (little endian, 2's complement) @@ -33,13 +30,15 @@ dd $$ ; p_paddr dd filesize ; p_filesz dd filesize ; p_memsz dd 7 ; p_flags: Read, Write & Execute -dd 0x1000 ; p_align +dd $1000 ; p_align phdrsize = $ - phdr _start: mov eax, 42 mov [a], eax +mov eax, [a] +push eax +pop ebx mov eax, 1 -mov ebx, [a] int $80 a: dd 43 filesize = $ - $$ -- cgit v1.2.3-54-g00ecf