summaryrefslogtreecommitdiff
path: root/miniasm/parse.c
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2017-01-01 19:31:12 +0100
committerAndreas Baumann <mail@andreasbaumann.cc>2017-01-01 19:31:12 +0100
commit69fe7b182a1eedfb75c611f7dd35fa60200426f4 (patch)
tree329a0c6cc9b06c23d8782ece09f0f7dfa9b16b13 /miniasm/parse.c
downloadcompilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.gz
compilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.bz2
initial checkin
Diffstat (limited to 'miniasm/parse.c')
-rw-r--r--miniasm/parse.c543
1 files changed, 543 insertions, 0 deletions
diff --git a/miniasm/parse.c b/miniasm/parse.c
new file mode 100644
index 0000000..2a1c423
--- /dev/null
+++ b/miniasm/parse.c
@@ -0,0 +1,543 @@
+#include "const.h"
+#include "hash.h"
+#include "opcodes.h"
+#include "optable.h"
+#include "scan.h"
+#include "parse.h"
+#include "string.h"
+#include "io.h"
+#include "utils.h"
+#include "minilib.h"
+#include "arena.h"
+
+void parser_init( Parser *p, Scanner *s )
+{
+ p->LC = 0;
+ p->s = s;
+ inthash_init( &p->labels, INIT_NOF_LABELS );
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->opcodeInfo = 0;
+ p->nof_operands = 0;
+ p->code = (char *)allocate( DEFAULT_MEMORY_SIZE );
+ p->last_opcode = 0;
+}
+
+void parser_done( Parser *p )
+{
+ deallocate( (void *)&p->code );
+ inthash_done( &p->labels );
+}
+
+void parser_debug( Parser *p, int enable )
+{
+ p->debug = enable;
+
+ if( p->debug ) {
+ print( "PARSER DEBUGGING ENABLED" );
+ }
+}
+
+static OpcodeInfo *handle_opcode( Parser *p, char *mnemonic )
+{
+ OpcodeInfo *opcode;
+
+ opcode = lookup_opcode( mnemonic );
+ if( !opcode ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ERROR: ILLEGAL OPCODE '" );
+ strcat( buf, mnemonic );
+ strcat( buf, "'" );
+ print( buf );
+ return 0;
+ }
+
+ return opcode;
+}
+
+static void remember_label( Parser *p, char *label )
+{
+ int LC;
+
+ LC = inthash_get( &p->labels, label );
+
+ if( LC == -1 ) {
+ inthash_set( &p->labels, strdup( label ), p->LC );
+ } else {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ERROR: DUPLICATE LABEL '" );
+ strcat( buf, label );
+ strcat( buf, "'" );
+ print( buf );
+ }
+}
+
+static int lookup_label( Parser *p, char *label )
+{
+ int LC;
+
+ LC = inthash_get( &p->labels, label );
+
+ return LC;
+}
+
+static void print_operand( Operand *o )
+{
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ *buf = '\0';
+
+ switch( o->type ) {
+ case OPERAND_TYPE_IDENT:
+ strcat( buf, "OPERAND_IDENT(" );
+ strcat( buf, o->data.ident );
+ strcat( buf, ")" );
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ strcat( buf, "OPERAND_INT_CONST(" );
+ inttohex( o->data.int_const, buf2 );
+ strcat( buf, buf2 );
+ strcat( buf, ")" );
+ break;
+ }
+
+ print( buf );
+}
+
+static int is_register( char *s )
+{
+ if( ( strcmp( s, "ax" ) == 0 ) ||
+ ( strcmp( s, "bx" ) == 0 ) ||
+ ( strcmp( s, "cx" ) == 0 ) ||
+ ( strcmp( s, "dx" ) == 0 ) ) {
+ return 1;
+ }
+ return 0;
+}
+
+static int register_src_mask( char *s )
+{
+ if( strcmp( s, "ax" ) == 0 ) {
+ return OPCODE_SRC_AX_REG;
+ } else if( strcmp( s, "bx" ) == 0 ) {
+ return OPCODE_SRC_BX_REG;
+ } else if( strcmp( s, "cx" ) == 0 ) {
+ return OPCODE_SRC_CX_REG;
+ } else if( strcmp( s, "dx" ) == 0 ) {
+ return OPCODE_SRC_DX_REG;
+ }
+}
+
+static int register_dst_mask( char *s )
+{
+ if( strcmp( s, "ax" ) == 0 ) {
+ return OPCODE_DST_AX_REG;
+ } else if( strcmp( s, "bx" ) == 0 ) {
+ return OPCODE_DST_BX_REG;
+ } else if( strcmp( s, "cx" ) == 0 ) {
+ return OPCODE_DST_CX_REG;
+ } else if( strcmp( s, "dx" ) == 0 ) {
+ return OPCODE_DST_DX_REG;
+ }
+}
+
+static void output_code_jmp( Parser *p )
+{
+ int addr;
+ int opcode;
+
+ if( p->debug ) {
+ print( "ENCODING JMP" );
+ print_operand( &p->operand[0] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( p->pass == 2 ) {
+ addr = lookup_label( p, p->operand[0].data.ident );
+ if( addr == -1 ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ERROR: ILLEGAL LABEL IN JUMP '" );
+ strcat( buf, p->operand[0].data.ident );
+ strcat( buf, "'" );
+ print( buf );
+ return;
+ }
+ p->code[p->LC+1] = addr;
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ /* assuming an absolute constant address */
+ addr = p->operand[0].data.int_const;
+ p->code[p->LC+1] = addr;
+ break;
+ }
+
+ p->last_opcode = opcode;
+}
+
+static void output_code_not( Parser *p )
+{
+ int opcode;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ENCODING NOT" );
+ print( buf );
+ print_operand( &p->operand[0] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[0].data.ident ) ) {
+ opcode |= register_src_mask( p->operand[0].data.ident );
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" );
+ break;
+ }
+
+ p->code[p->LC] = opcode;
+
+ p->last_opcode = opcode;
+}
+
+static void output_code_stack( Parser *p )
+{
+ int opcode;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ENCODING NOT" );
+ print( buf );
+ print_operand( &p->operand[0] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[0].data.ident ) ) {
+ opcode |= register_src_mask( p->operand[0].data.ident );
+ }
+ break;
+
+ default:
+ print( "ERROR: PUSH AND POP EXCPECT A REGISTER AS OPERAND" );
+ break;
+ }
+
+ p->code[p->LC] = opcode;
+
+ p->last_opcode = opcode;
+}
+
+static void handle_2op( Parser *p, char *name )
+{
+ int opcode;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ENCODING " );
+ strcat( buf, name );
+ print( buf );
+ print_operand( &p->operand[0] );
+ print_operand( &p->operand[1] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[0].data.ident ) ) {
+ opcode |= register_dst_mask( p->operand[0].data.ident );
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" );
+ break;
+ }
+
+ switch( p->operand[1].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[1].data.ident ) ) {
+ opcode |= register_src_mask( p->operand[1].data.ident );
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ opcode |= OPCODE_SRC_CONST;
+ p->code[p->LC+1] = p->operand[1].data.int_const;
+ break;
+ }
+
+ p->code[p->LC] = opcode;
+
+ p->last_opcode = opcode;
+}
+
+static void output_code_mov( Parser *p )
+{
+ handle_2op( p, "MOV" );
+}
+
+static void output_code_or( Parser *p )
+{
+ handle_2op( p, "OR" );
+}
+
+static void output_code_and( Parser *p )
+{
+ handle_2op( p, "AND" );
+}
+
+static void output_code_add( Parser *p )
+{
+ handle_2op( p, "ADD" );
+}
+
+static void output_code_cmp( Parser *p )
+{
+ handle_2op( p, "CMP" );
+}
+
+static void output_code_sub( Parser *p )
+{
+ handle_2op( p, "SUB" );
+}
+
+static void output_code( Parser *p )
+{
+ p->code[p->LC] = p->opcodeInfo->opcode;
+
+ if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_MOV_MASK ) {
+ output_code_mov( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_OR ) {
+ output_code_or( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_AND ) {
+ output_code_and( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_CMP ) {
+ output_code_cmp( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_SUB ) {
+ output_code_sub( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_ADD ) {
+ output_code_add( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) {
+ output_code_jmp( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_NOT ) {
+ output_code_not( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_STACK ) {
+ output_code_stack( p );
+ } else if( ( ~p->opcodeInfo->opcode & OPCODE_ZERO_OPS_MASK ) == OPCODE_ZERO_OPS_MASK ) {
+ /* done above */
+ } else {
+ print( "ERROR: ILLEGAL OUTPUT CODE" );
+ }
+}
+
+static int compute_codesize( Parser *p )
+{
+ if( p->last_opcode & OPCODE_GROUP_TWO_OPERANDS ) {
+ if( ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_CONST ||
+ ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_MEMORY ) {
+ print( "CODESIZE 2" );
+ return 2;
+ } else {
+ print( "CODESIZE 1" );
+ return 1;
+ }
+ } else if( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) {
+ if( ( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) {
+ print( "CODESIZE 2" );
+ return 2;
+ } else {
+ print( "CODESIZE 1" );
+ return 1;
+ }
+ } else {
+ print( "CODESIZE 1" );
+ return 1;
+ }
+}
+
+static void pass( Parser *p, int pass )
+{
+ Symbol s;
+
+ p->pass = pass;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ inttohex( pass, buf2 );
+ *buf = '\0';
+ strcat( buf, "PASS " );
+ strcat( buf, buf2 );
+ print( buf );
+ }
+
+ s = scanner_scan( p->s );
+
+ while( s.sym != S_eof ) {
+ if( s.sym == S_comment ) {
+ scanner_skip_line( p->s );
+ if( p->debug ) {
+ print( "PARSER(COMMENT)" );
+ }
+ p->state = PARSE_LABEL_OR_OPCODE;
+ } else if( s.sym == S_label ) {
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "LABEL(" );
+ strcat( buf, s.data.s );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ if( pass == 1 ) {
+ remember_label( p, s.data.s );
+ }
+ p->state = PARSE_OPCODE;
+ } else if( s.sym == S_ident ) {
+ char *ident = s.data.s;
+ char buf[MAX_TMP_LEN];
+
+ switch( p->state ) {
+ case PARSE_LABEL_OR_OPCODE:
+ case PARSE_OPCODE:
+ if( p->debug ) {
+ *buf = '\0';
+ strcat( buf, "OPCODE(" );
+ strcat( buf, ident );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ p->opcodeInfo = handle_opcode( p, ident );
+ if( p->opcodeInfo ) {
+ if( p->opcodeInfo->operands > 0 ) {
+ p->state = PARSE_OPERAND;
+ p->nof_operands = 0;
+ }
+ }
+ break;
+
+ case PARSE_OPERAND:
+ if( p->debug ) {
+ *buf = '\0';
+ strcat( buf, "OPERAND(" );
+ strcat( buf, ident );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ p->operand[p->nof_operands].type = OPERAND_TYPE_IDENT;
+ strcpy( p->operand[p->nof_operands].data.ident, ident );
+ p->nof_operands++;
+ if( p->nof_operands >= p->opcodeInfo->operands ) {
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->nof_operands = 0;
+ }
+ break;
+
+ default:
+ print( "ERROR: ILLEGAL PARSE STATE" );
+ halt( );
+ }
+ } else if( s.sym == S_number ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ int int_const = s.data.n;
+
+ switch( p->state ) {
+ case PARSE_LABEL_OR_OPCODE:
+ case PARSE_OPCODE:
+ print( "ERROR: EXPECTED IDENTIFIER FOR LABEL OR OPCODE" );
+ p->state = PARSE_OPERAND;
+ p->nof_operands = 0;
+ break;
+
+ case PARSE_OPERAND:
+ if( p->debug ) {
+ *buf = '\0';
+ strcat( buf, "OPERAND(" );
+ inttohex( int_const, buf2 );
+ strcat( buf, buf2 );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ p->operand[p->nof_operands].type = OPERAND_TYPE_INT_CONST;
+ p->operand[p->nof_operands].data.int_const = int_const;
+ p->nof_operands++;
+ if( p->nof_operands >= p->opcodeInfo->operands ) {
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->nof_operands = 0;
+ }
+ break;
+
+ default:
+ print( "ERROR: ILLEGAL PARSE STATE" );
+ halt( );
+ }
+ } else if( s.sym == S_newline ) {
+ if( p->opcodeInfo ) {
+ output_code( p );
+ p->LC += compute_codesize( p );
+ }
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->opcodeInfo = 0;
+ p->nof_operands = 0;
+ }
+ s = scanner_scan( p->s );
+ }
+}
+
+static void print_labels( Parser *p )
+{
+ intHashIterator it;
+ intHashEntry *entry;
+
+ print( "LABELS:" );
+ entry = inthash_getfirst( &p->labels, &it );
+ while( entry ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, " " );
+ strcat( buf, entry->key );
+ strcat( buf, " = " );
+ inttohex( entry->value, buf2 );
+ strcat( buf, buf2 );
+ print( buf );
+ entry = inthash_getnext( &it );
+ }
+}
+
+void parser_parse( Parser *p )
+{
+ opcode_table_init( );
+
+ pass( p, 1 );
+
+ if( p->debug ) {
+ print_labels( p );
+ }
+
+ scanner_reset( p->s );
+ p->LC = 0;
+ pass( p, 2 );
+
+ opcode_table_done( );
+}