diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2017-01-01 19:31:12 +0100 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2017-01-01 19:31:12 +0100 |
commit | 69fe7b182a1eedfb75c611f7dd35fa60200426f4 (patch) | |
tree | 329a0c6cc9b06c23d8782ece09f0f7dfa9b16b13 /miniasm/parse.c | |
download | compilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.gz compilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.bz2 |
initial checkin
Diffstat (limited to 'miniasm/parse.c')
-rw-r--r-- | miniasm/parse.c | 543 |
1 files changed, 543 insertions, 0 deletions
diff --git a/miniasm/parse.c b/miniasm/parse.c new file mode 100644 index 0000000..2a1c423 --- /dev/null +++ b/miniasm/parse.c @@ -0,0 +1,543 @@ +#include "const.h" +#include "hash.h" +#include "opcodes.h" +#include "optable.h" +#include "scan.h" +#include "parse.h" +#include "string.h" +#include "io.h" +#include "utils.h" +#include "minilib.h" +#include "arena.h" + +void parser_init( Parser *p, Scanner *s ) +{ + p->LC = 0; + p->s = s; + inthash_init( &p->labels, INIT_NOF_LABELS ); + p->state = PARSE_LABEL_OR_OPCODE; + p->opcodeInfo = 0; + p->nof_operands = 0; + p->code = (char *)allocate( DEFAULT_MEMORY_SIZE ); + p->last_opcode = 0; +} + +void parser_done( Parser *p ) +{ + deallocate( (void *)&p->code ); + inthash_done( &p->labels ); +} + +void parser_debug( Parser *p, int enable ) +{ + p->debug = enable; + + if( p->debug ) { + print( "PARSER DEBUGGING ENABLED" ); + } +} + +static OpcodeInfo *handle_opcode( Parser *p, char *mnemonic ) +{ + OpcodeInfo *opcode; + + opcode = lookup_opcode( mnemonic ); + if( !opcode ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ERROR: ILLEGAL OPCODE '" ); + strcat( buf, mnemonic ); + strcat( buf, "'" ); + print( buf ); + return 0; + } + + return opcode; +} + +static void remember_label( Parser *p, char *label ) +{ + int LC; + + LC = inthash_get( &p->labels, label ); + + if( LC == -1 ) { + inthash_set( &p->labels, strdup( label ), p->LC ); + } else { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ERROR: DUPLICATE LABEL '" ); + strcat( buf, label ); + strcat( buf, "'" ); + print( buf ); + } +} + +static int lookup_label( Parser *p, char *label ) +{ + int LC; + + LC = inthash_get( &p->labels, label ); + + return LC; +} + +static void print_operand( Operand *o ) +{ + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + *buf = '\0'; + + switch( o->type ) { + case OPERAND_TYPE_IDENT: + strcat( buf, "OPERAND_IDENT(" ); + strcat( buf, o->data.ident ); + strcat( buf, ")" ); + break; + + case OPERAND_TYPE_INT_CONST: + strcat( buf, "OPERAND_INT_CONST(" ); + inttohex( o->data.int_const, buf2 ); + strcat( buf, buf2 ); + strcat( buf, ")" ); + break; + } + + print( buf ); +} + +static int is_register( char *s ) +{ + if( ( strcmp( s, "ax" ) == 0 ) || + ( strcmp( s, "bx" ) == 0 ) || + ( strcmp( s, "cx" ) == 0 ) || + ( strcmp( s, "dx" ) == 0 ) ) { + return 1; + } + return 0; +} + +static int register_src_mask( char *s ) +{ + if( strcmp( s, "ax" ) == 0 ) { + return OPCODE_SRC_AX_REG; + } else if( strcmp( s, "bx" ) == 0 ) { + return OPCODE_SRC_BX_REG; + } else if( strcmp( s, "cx" ) == 0 ) { + return OPCODE_SRC_CX_REG; + } else if( strcmp( s, "dx" ) == 0 ) { + return OPCODE_SRC_DX_REG; + } +} + +static int register_dst_mask( char *s ) +{ + if( strcmp( s, "ax" ) == 0 ) { + return OPCODE_DST_AX_REG; + } else if( strcmp( s, "bx" ) == 0 ) { + return OPCODE_DST_BX_REG; + } else if( strcmp( s, "cx" ) == 0 ) { + return OPCODE_DST_CX_REG; + } else if( strcmp( s, "dx" ) == 0 ) { + return OPCODE_DST_DX_REG; + } +} + +static void output_code_jmp( Parser *p ) +{ + int addr; + int opcode; + + if( p->debug ) { + print( "ENCODING JMP" ); + print_operand( &p->operand[0] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( p->pass == 2 ) { + addr = lookup_label( p, p->operand[0].data.ident ); + if( addr == -1 ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ERROR: ILLEGAL LABEL IN JUMP '" ); + strcat( buf, p->operand[0].data.ident ); + strcat( buf, "'" ); + print( buf ); + return; + } + p->code[p->LC+1] = addr; + } + break; + + case OPERAND_TYPE_INT_CONST: + /* assuming an absolute constant address */ + addr = p->operand[0].data.int_const; + p->code[p->LC+1] = addr; + break; + } + + p->last_opcode = opcode; +} + +static void output_code_not( Parser *p ) +{ + int opcode; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ENCODING NOT" ); + print( buf ); + print_operand( &p->operand[0] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[0].data.ident ) ) { + opcode |= register_src_mask( p->operand[0].data.ident ); + } + break; + + case OPERAND_TYPE_INT_CONST: + print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" ); + break; + } + + p->code[p->LC] = opcode; + + p->last_opcode = opcode; +} + +static void output_code_stack( Parser *p ) +{ + int opcode; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ENCODING NOT" ); + print( buf ); + print_operand( &p->operand[0] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[0].data.ident ) ) { + opcode |= register_src_mask( p->operand[0].data.ident ); + } + break; + + default: + print( "ERROR: PUSH AND POP EXCPECT A REGISTER AS OPERAND" ); + break; + } + + p->code[p->LC] = opcode; + + p->last_opcode = opcode; +} + +static void handle_2op( Parser *p, char *name ) +{ + int opcode; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ENCODING " ); + strcat( buf, name ); + print( buf ); + print_operand( &p->operand[0] ); + print_operand( &p->operand[1] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[0].data.ident ) ) { + opcode |= register_dst_mask( p->operand[0].data.ident ); + } + break; + + case OPERAND_TYPE_INT_CONST: + print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" ); + break; + } + + switch( p->operand[1].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[1].data.ident ) ) { + opcode |= register_src_mask( p->operand[1].data.ident ); + } + break; + + case OPERAND_TYPE_INT_CONST: + opcode |= OPCODE_SRC_CONST; + p->code[p->LC+1] = p->operand[1].data.int_const; + break; + } + + p->code[p->LC] = opcode; + + p->last_opcode = opcode; +} + +static void output_code_mov( Parser *p ) +{ + handle_2op( p, "MOV" ); +} + +static void output_code_or( Parser *p ) +{ + handle_2op( p, "OR" ); +} + +static void output_code_and( Parser *p ) +{ + handle_2op( p, "AND" ); +} + +static void output_code_add( Parser *p ) +{ + handle_2op( p, "ADD" ); +} + +static void output_code_cmp( Parser *p ) +{ + handle_2op( p, "CMP" ); +} + +static void output_code_sub( Parser *p ) +{ + handle_2op( p, "SUB" ); +} + +static void output_code( Parser *p ) +{ + p->code[p->LC] = p->opcodeInfo->opcode; + + if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_MOV_MASK ) { + output_code_mov( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_OR ) { + output_code_or( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_AND ) { + output_code_and( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_CMP ) { + output_code_cmp( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_SUB ) { + output_code_sub( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_ADD ) { + output_code_add( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) { + output_code_jmp( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_NOT ) { + output_code_not( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_STACK ) { + output_code_stack( p ); + } else if( ( ~p->opcodeInfo->opcode & OPCODE_ZERO_OPS_MASK ) == OPCODE_ZERO_OPS_MASK ) { + /* done above */ + } else { + print( "ERROR: ILLEGAL OUTPUT CODE" ); + } +} + +static int compute_codesize( Parser *p ) +{ + if( p->last_opcode & OPCODE_GROUP_TWO_OPERANDS ) { + if( ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_CONST || + ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_MEMORY ) { + print( "CODESIZE 2" ); + return 2; + } else { + print( "CODESIZE 1" ); + return 1; + } + } else if( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) { + if( ( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) { + print( "CODESIZE 2" ); + return 2; + } else { + print( "CODESIZE 1" ); + return 1; + } + } else { + print( "CODESIZE 1" ); + return 1; + } +} + +static void pass( Parser *p, int pass ) +{ + Symbol s; + + p->pass = pass; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + inttohex( pass, buf2 ); + *buf = '\0'; + strcat( buf, "PASS " ); + strcat( buf, buf2 ); + print( buf ); + } + + s = scanner_scan( p->s ); + + while( s.sym != S_eof ) { + if( s.sym == S_comment ) { + scanner_skip_line( p->s ); + if( p->debug ) { + print( "PARSER(COMMENT)" ); + } + p->state = PARSE_LABEL_OR_OPCODE; + } else if( s.sym == S_label ) { + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "LABEL(" ); + strcat( buf, s.data.s ); + strcat( buf, ")" ); + print( buf ); + } + if( pass == 1 ) { + remember_label( p, s.data.s ); + } + p->state = PARSE_OPCODE; + } else if( s.sym == S_ident ) { + char *ident = s.data.s; + char buf[MAX_TMP_LEN]; + + switch( p->state ) { + case PARSE_LABEL_OR_OPCODE: + case PARSE_OPCODE: + if( p->debug ) { + *buf = '\0'; + strcat( buf, "OPCODE(" ); + strcat( buf, ident ); + strcat( buf, ")" ); + print( buf ); + } + p->opcodeInfo = handle_opcode( p, ident ); + if( p->opcodeInfo ) { + if( p->opcodeInfo->operands > 0 ) { + p->state = PARSE_OPERAND; + p->nof_operands = 0; + } + } + break; + + case PARSE_OPERAND: + if( p->debug ) { + *buf = '\0'; + strcat( buf, "OPERAND(" ); + strcat( buf, ident ); + strcat( buf, ")" ); + print( buf ); + } + p->operand[p->nof_operands].type = OPERAND_TYPE_IDENT; + strcpy( p->operand[p->nof_operands].data.ident, ident ); + p->nof_operands++; + if( p->nof_operands >= p->opcodeInfo->operands ) { + p->state = PARSE_LABEL_OR_OPCODE; + p->nof_operands = 0; + } + break; + + default: + print( "ERROR: ILLEGAL PARSE STATE" ); + halt( ); + } + } else if( s.sym == S_number ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + int int_const = s.data.n; + + switch( p->state ) { + case PARSE_LABEL_OR_OPCODE: + case PARSE_OPCODE: + print( "ERROR: EXPECTED IDENTIFIER FOR LABEL OR OPCODE" ); + p->state = PARSE_OPERAND; + p->nof_operands = 0; + break; + + case PARSE_OPERAND: + if( p->debug ) { + *buf = '\0'; + strcat( buf, "OPERAND(" ); + inttohex( int_const, buf2 ); + strcat( buf, buf2 ); + strcat( buf, ")" ); + print( buf ); + } + p->operand[p->nof_operands].type = OPERAND_TYPE_INT_CONST; + p->operand[p->nof_operands].data.int_const = int_const; + p->nof_operands++; + if( p->nof_operands >= p->opcodeInfo->operands ) { + p->state = PARSE_LABEL_OR_OPCODE; + p->nof_operands = 0; + } + break; + + default: + print( "ERROR: ILLEGAL PARSE STATE" ); + halt( ); + } + } else if( s.sym == S_newline ) { + if( p->opcodeInfo ) { + output_code( p ); + p->LC += compute_codesize( p ); + } + p->state = PARSE_LABEL_OR_OPCODE; + p->opcodeInfo = 0; + p->nof_operands = 0; + } + s = scanner_scan( p->s ); + } +} + +static void print_labels( Parser *p ) +{ + intHashIterator it; + intHashEntry *entry; + + print( "LABELS:" ); + entry = inthash_getfirst( &p->labels, &it ); + while( entry ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, " " ); + strcat( buf, entry->key ); + strcat( buf, " = " ); + inttohex( entry->value, buf2 ); + strcat( buf, buf2 ); + print( buf ); + entry = inthash_getnext( &it ); + } +} + +void parser_parse( Parser *p ) +{ + opcode_table_init( ); + + pass( p, 1 ); + + if( p->debug ) { + print_labels( p ); + } + + scanner_reset( p->s ); + p->LC = 0; + pass( p, 2 ); + + opcode_table_done( ); +} |