diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2017-01-01 19:31:12 +0100 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2017-01-01 19:31:12 +0100 |
commit | 69fe7b182a1eedfb75c611f7dd35fa60200426f4 (patch) | |
tree | 329a0c6cc9b06c23d8782ece09f0f7dfa9b16b13 /miniasm | |
download | compilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.gz compilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.bz2 |
initial checkin
Diffstat (limited to 'miniasm')
-rw-r--r-- | miniasm/README | 35 | ||||
-rw-r--r-- | miniasm/const.h | 10 | ||||
-rw-r--r-- | miniasm/main.c | 44 | ||||
-rw-r--r-- | miniasm/optable.c | 56 | ||||
-rw-r--r-- | miniasm/optable.h | 22 | ||||
-rw-r--r-- | miniasm/parse.c | 543 | ||||
-rw-r--r-- | miniasm/parse.h | 39 | ||||
-rw-r--r-- | miniasm/scan.c | 199 | ||||
-rw-r--r-- | miniasm/scan.h | 37 | ||||
-rw-r--r-- | miniasm/test1.asm | 6 | ||||
-rw-r--r-- | miniasm/test2.asm | 8 | ||||
-rw-r--r-- | miniasm/test3.asm | 13 | ||||
-rw-r--r-- | miniasm/test4.asm | 17 | ||||
-rw-r--r-- | miniasm/test5.asm | 19 | ||||
-rw-r--r-- | miniasm/test6.asm | 12 | ||||
-rw-r--r-- | miniasm/test7.asm | 12 | ||||
-rw-r--r-- | miniasm/test8.asm | 25 |
17 files changed, 1097 insertions, 0 deletions
diff --git a/miniasm/README b/miniasm/README new file mode 100644 index 0000000..a7098f2 --- /dev/null +++ b/miniasm/README @@ -0,0 +1,35 @@ +Design +------ + +- one-pass: patching up addresses later or two-pass (scan location of all symbols first, + have them ready for the second pass), two-pass assemblers are easier to write and + relocation of addresses is possible in a deferred way, for instance at load time +- intermediate listing-format +- LC: location counter +- extreme approach: encode the specific assembly language (e.g. register names) into + the grammar, another extreme approach: hand-coded parser to parse the lines. One + line == one instruction is something we usually don't like in a grammar ('\n' having + meaning in the grammar), on the other hand keywords in assembly ('INSTR MOV AX, B END') + looks a little bit clumpsy. Definitely when we have complex address calcualations + we can benefit from a real parser (generated or not). +- one vs. two-pass +- labels and local labels (FUNC: and .loop and FUNC.loop) +- LC-relative addressing $+5 + +Building +-------- + +gcc -I../minilib -I../miniemu -g -O0 -m32 -march=i386 -ffreestanding -Werror -Wall -Wno-return-type -pedantic -std=c89 -o miniasm *.c ../minilib/*.c +clang -I../minilib -I../miniemu -g -O0 -march=i386 -fno-builtin -std=c89 -Werror -Wall -Wno-return-type -o miniasm *.c ../minilib/*.c + +Usage +----- + +./miniasm test1.asm ../miniemu/test1.bin +./miniasm test2.asm ../miniemu/test2.bin +./miniasm test3.asm ../miniemu/test3.bin +./miniasm test4.asm ../miniemu/test4.bin +./miniasm test5.asm ../miniemu/test5.bin +./miniasm test6.asm ../miniemu/test6.bin +./miniasm test7.asm ../miniemu/test7.bin +./miniasm test8.asm ../miniemu/test8.bin diff --git a/miniasm/const.h b/miniasm/const.h new file mode 100644 index 0000000..6ccc38d --- /dev/null +++ b/miniasm/const.h @@ -0,0 +1,10 @@ +#pragma once + +enum { + MAX_LABEL_LEN = 12, + MAX_OP_LEN = 10, + MAX_IDENT_LEN = 12, + MAX_TMP_LEN = 80, + INIT_NOF_LABELS = 4, + DEFAULT_MEMORY_SIZE = 256 +}; diff --git a/miniasm/main.c b/miniasm/main.c new file mode 100644 index 0000000..53b01b9 --- /dev/null +++ b/miniasm/main.c @@ -0,0 +1,44 @@ +#include "stdlib.h" +#include "const.h" +#include "io.h" +#include "arena.h" +#include "scan.h" +#include "hash.h" +#include "opcodes.h" +#include "optable.h" +#include "parse.h" + +int main( int argc, char *argv[] ) +{ + char *src; + Scanner scanner; + Parser parser; + + if( argc != 3 ) { + print( "USAGE: miniasm <module.asm> <module.bin>" ); + return 1; + } + + src = readallfile( argv[1] ); + if( !src ) { + print( "UNABLE TO READ SOURCE FILE" ); + return 1; + } + + scanner_init( &scanner, src ); + parser_init( &parser, &scanner ); + + scanner_debug( &scanner, 0 ); + parser_debug( &parser, 1 ); + + parser_parse( &parser ); + + writefile( argv[2], parser.code, DEFAULT_MEMORY_SIZE ); + + parser_done( &parser ); + scanner_done( &scanner ); + + deallocate( (void **)&src ); + + exit( 0 ); +} diff --git a/miniasm/optable.c b/miniasm/optable.c new file mode 100644 index 0000000..c569b74 --- /dev/null +++ b/miniasm/optable.c @@ -0,0 +1,56 @@ +#include "opcodes.h" +#include "hash.h" +#include "optable.h" + +OpcodeInfo opcodeTable[NOF_OPCODES] = { + { "mov", OPCODE_MOV_MASK, 2 }, + { "or", OPCODE_OR, 2 }, + { "and", OPCODE_AND, 2 }, + { "sub", OPCODE_SUB, 2 }, + { "add", OPCODE_ADD, 2 }, + { "cmp", OPCODE_CMP, 2 }, + { "jmp", OPCODE_JMP | OPCODE_JMP_JMP, 1 }, + { "je", OPCODE_JMP | OPCODE_JMP_JE, 1 }, + { "jne", OPCODE_JMP | OPCODE_JMP_JNE, 1 }, + { "ja", OPCODE_JMP | OPCODE_JMP_JA, 1 }, + { "jae", OPCODE_JMP | OPCODE_JMP_JAE, 1 }, + { "jb", OPCODE_JMP | OPCODE_JMP_JB, 1 }, + { "jbe", OPCODE_JMP | OPCODE_JMP_JBE, 1 }, + { "jsr", OPCODE_JMP | OPCODE_JMP_JSR, 1 }, + { "not", OPCODE_NOT, 1 }, + { "push", OPCODE_STACK | OPCODE_STACK_PUSH, 1 }, + { "pop", OPCODE_STACK | OPCODE_STACK_POP, 1 }, + { "nop", OPCODE_NOP, 0 }, + { "hlt", OPCODE_HLT, 0 }, + { "ret", OPCODE_RET, 0 } +}; + +intHashTable opcode_ht; + +void opcode_table_init( ) +{ + int i; + + inthash_init( &opcode_ht, NOF_OPCODES ); + + for( i = 0; i < NOF_OPCODES; i++ ) { + inthash_set( &opcode_ht, opcodeTable[i].mnemonic, i ); + } +} + +void opcode_table_done( ) +{ + inthash_done( &opcode_ht ); +} + +OpcodeInfo *lookup_opcode( char *mnemonic ) +{ + int idx; + + idx = inthash_get( &opcode_ht, mnemonic ); + if( idx >= 0 ) { + return &opcodeTable[idx]; + } + + return 0; +} diff --git a/miniasm/optable.h b/miniasm/optable.h new file mode 100644 index 0000000..bd3c5c2 --- /dev/null +++ b/miniasm/optable.h @@ -0,0 +1,22 @@ +#pragma once + +enum { + NOF_OPCODES = 20, + MAX_MNEMONIC_SIZE = 5, + MAX_OPERANDS = 2 +}; + +typedef struct OpcodeInfo +{ + char mnemonic[MAX_MNEMONIC_SIZE]; + Opcode opcode; + int operands; +} OpcodeInfo; + +extern OpcodeInfo opcodeTable[NOF_OPCODES]; + +extern intHashTable opcode_ht; + +extern void opcode_table_init( ); +extern void opcode_table_done( ); +extern OpcodeInfo *lookup_opcode( char *mnemonic ); diff --git a/miniasm/parse.c b/miniasm/parse.c new file mode 100644 index 0000000..2a1c423 --- /dev/null +++ b/miniasm/parse.c @@ -0,0 +1,543 @@ +#include "const.h" +#include "hash.h" +#include "opcodes.h" +#include "optable.h" +#include "scan.h" +#include "parse.h" +#include "string.h" +#include "io.h" +#include "utils.h" +#include "minilib.h" +#include "arena.h" + +void parser_init( Parser *p, Scanner *s ) +{ + p->LC = 0; + p->s = s; + inthash_init( &p->labels, INIT_NOF_LABELS ); + p->state = PARSE_LABEL_OR_OPCODE; + p->opcodeInfo = 0; + p->nof_operands = 0; + p->code = (char *)allocate( DEFAULT_MEMORY_SIZE ); + p->last_opcode = 0; +} + +void parser_done( Parser *p ) +{ + deallocate( (void *)&p->code ); + inthash_done( &p->labels ); +} + +void parser_debug( Parser *p, int enable ) +{ + p->debug = enable; + + if( p->debug ) { + print( "PARSER DEBUGGING ENABLED" ); + } +} + +static OpcodeInfo *handle_opcode( Parser *p, char *mnemonic ) +{ + OpcodeInfo *opcode; + + opcode = lookup_opcode( mnemonic ); + if( !opcode ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ERROR: ILLEGAL OPCODE '" ); + strcat( buf, mnemonic ); + strcat( buf, "'" ); + print( buf ); + return 0; + } + + return opcode; +} + +static void remember_label( Parser *p, char *label ) +{ + int LC; + + LC = inthash_get( &p->labels, label ); + + if( LC == -1 ) { + inthash_set( &p->labels, strdup( label ), p->LC ); + } else { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ERROR: DUPLICATE LABEL '" ); + strcat( buf, label ); + strcat( buf, "'" ); + print( buf ); + } +} + +static int lookup_label( Parser *p, char *label ) +{ + int LC; + + LC = inthash_get( &p->labels, label ); + + return LC; +} + +static void print_operand( Operand *o ) +{ + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + *buf = '\0'; + + switch( o->type ) { + case OPERAND_TYPE_IDENT: + strcat( buf, "OPERAND_IDENT(" ); + strcat( buf, o->data.ident ); + strcat( buf, ")" ); + break; + + case OPERAND_TYPE_INT_CONST: + strcat( buf, "OPERAND_INT_CONST(" ); + inttohex( o->data.int_const, buf2 ); + strcat( buf, buf2 ); + strcat( buf, ")" ); + break; + } + + print( buf ); +} + +static int is_register( char *s ) +{ + if( ( strcmp( s, "ax" ) == 0 ) || + ( strcmp( s, "bx" ) == 0 ) || + ( strcmp( s, "cx" ) == 0 ) || + ( strcmp( s, "dx" ) == 0 ) ) { + return 1; + } + return 0; +} + +static int register_src_mask( char *s ) +{ + if( strcmp( s, "ax" ) == 0 ) { + return OPCODE_SRC_AX_REG; + } else if( strcmp( s, "bx" ) == 0 ) { + return OPCODE_SRC_BX_REG; + } else if( strcmp( s, "cx" ) == 0 ) { + return OPCODE_SRC_CX_REG; + } else if( strcmp( s, "dx" ) == 0 ) { + return OPCODE_SRC_DX_REG; + } +} + +static int register_dst_mask( char *s ) +{ + if( strcmp( s, "ax" ) == 0 ) { + return OPCODE_DST_AX_REG; + } else if( strcmp( s, "bx" ) == 0 ) { + return OPCODE_DST_BX_REG; + } else if( strcmp( s, "cx" ) == 0 ) { + return OPCODE_DST_CX_REG; + } else if( strcmp( s, "dx" ) == 0 ) { + return OPCODE_DST_DX_REG; + } +} + +static void output_code_jmp( Parser *p ) +{ + int addr; + int opcode; + + if( p->debug ) { + print( "ENCODING JMP" ); + print_operand( &p->operand[0] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( p->pass == 2 ) { + addr = lookup_label( p, p->operand[0].data.ident ); + if( addr == -1 ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ERROR: ILLEGAL LABEL IN JUMP '" ); + strcat( buf, p->operand[0].data.ident ); + strcat( buf, "'" ); + print( buf ); + return; + } + p->code[p->LC+1] = addr; + } + break; + + case OPERAND_TYPE_INT_CONST: + /* assuming an absolute constant address */ + addr = p->operand[0].data.int_const; + p->code[p->LC+1] = addr; + break; + } + + p->last_opcode = opcode; +} + +static void output_code_not( Parser *p ) +{ + int opcode; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ENCODING NOT" ); + print( buf ); + print_operand( &p->operand[0] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[0].data.ident ) ) { + opcode |= register_src_mask( p->operand[0].data.ident ); + } + break; + + case OPERAND_TYPE_INT_CONST: + print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" ); + break; + } + + p->code[p->LC] = opcode; + + p->last_opcode = opcode; +} + +static void output_code_stack( Parser *p ) +{ + int opcode; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ENCODING NOT" ); + print( buf ); + print_operand( &p->operand[0] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[0].data.ident ) ) { + opcode |= register_src_mask( p->operand[0].data.ident ); + } + break; + + default: + print( "ERROR: PUSH AND POP EXCPECT A REGISTER AS OPERAND" ); + break; + } + + p->code[p->LC] = opcode; + + p->last_opcode = opcode; +} + +static void handle_2op( Parser *p, char *name ) +{ + int opcode; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "ENCODING " ); + strcat( buf, name ); + print( buf ); + print_operand( &p->operand[0] ); + print_operand( &p->operand[1] ); + } + + opcode = p->opcodeInfo->opcode; + + switch( p->operand[0].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[0].data.ident ) ) { + opcode |= register_dst_mask( p->operand[0].data.ident ); + } + break; + + case OPERAND_TYPE_INT_CONST: + print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" ); + break; + } + + switch( p->operand[1].type ) { + case OPERAND_TYPE_IDENT: + if( is_register( p->operand[1].data.ident ) ) { + opcode |= register_src_mask( p->operand[1].data.ident ); + } + break; + + case OPERAND_TYPE_INT_CONST: + opcode |= OPCODE_SRC_CONST; + p->code[p->LC+1] = p->operand[1].data.int_const; + break; + } + + p->code[p->LC] = opcode; + + p->last_opcode = opcode; +} + +static void output_code_mov( Parser *p ) +{ + handle_2op( p, "MOV" ); +} + +static void output_code_or( Parser *p ) +{ + handle_2op( p, "OR" ); +} + +static void output_code_and( Parser *p ) +{ + handle_2op( p, "AND" ); +} + +static void output_code_add( Parser *p ) +{ + handle_2op( p, "ADD" ); +} + +static void output_code_cmp( Parser *p ) +{ + handle_2op( p, "CMP" ); +} + +static void output_code_sub( Parser *p ) +{ + handle_2op( p, "SUB" ); +} + +static void output_code( Parser *p ) +{ + p->code[p->LC] = p->opcodeInfo->opcode; + + if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_MOV_MASK ) { + output_code_mov( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_OR ) { + output_code_or( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_AND ) { + output_code_and( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_CMP ) { + output_code_cmp( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_SUB ) { + output_code_sub( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_ADD ) { + output_code_add( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) { + output_code_jmp( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_NOT ) { + output_code_not( p ); + } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_STACK ) { + output_code_stack( p ); + } else if( ( ~p->opcodeInfo->opcode & OPCODE_ZERO_OPS_MASK ) == OPCODE_ZERO_OPS_MASK ) { + /* done above */ + } else { + print( "ERROR: ILLEGAL OUTPUT CODE" ); + } +} + +static int compute_codesize( Parser *p ) +{ + if( p->last_opcode & OPCODE_GROUP_TWO_OPERANDS ) { + if( ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_CONST || + ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_MEMORY ) { + print( "CODESIZE 2" ); + return 2; + } else { + print( "CODESIZE 1" ); + return 1; + } + } else if( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) { + if( ( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) { + print( "CODESIZE 2" ); + return 2; + } else { + print( "CODESIZE 1" ); + return 1; + } + } else { + print( "CODESIZE 1" ); + return 1; + } +} + +static void pass( Parser *p, int pass ) +{ + Symbol s; + + p->pass = pass; + + if( p->debug ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + inttohex( pass, buf2 ); + *buf = '\0'; + strcat( buf, "PASS " ); + strcat( buf, buf2 ); + print( buf ); + } + + s = scanner_scan( p->s ); + + while( s.sym != S_eof ) { + if( s.sym == S_comment ) { + scanner_skip_line( p->s ); + if( p->debug ) { + print( "PARSER(COMMENT)" ); + } + p->state = PARSE_LABEL_OR_OPCODE; + } else if( s.sym == S_label ) { + if( p->debug ) { + char buf[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, "LABEL(" ); + strcat( buf, s.data.s ); + strcat( buf, ")" ); + print( buf ); + } + if( pass == 1 ) { + remember_label( p, s.data.s ); + } + p->state = PARSE_OPCODE; + } else if( s.sym == S_ident ) { + char *ident = s.data.s; + char buf[MAX_TMP_LEN]; + + switch( p->state ) { + case PARSE_LABEL_OR_OPCODE: + case PARSE_OPCODE: + if( p->debug ) { + *buf = '\0'; + strcat( buf, "OPCODE(" ); + strcat( buf, ident ); + strcat( buf, ")" ); + print( buf ); + } + p->opcodeInfo = handle_opcode( p, ident ); + if( p->opcodeInfo ) { + if( p->opcodeInfo->operands > 0 ) { + p->state = PARSE_OPERAND; + p->nof_operands = 0; + } + } + break; + + case PARSE_OPERAND: + if( p->debug ) { + *buf = '\0'; + strcat( buf, "OPERAND(" ); + strcat( buf, ident ); + strcat( buf, ")" ); + print( buf ); + } + p->operand[p->nof_operands].type = OPERAND_TYPE_IDENT; + strcpy( p->operand[p->nof_operands].data.ident, ident ); + p->nof_operands++; + if( p->nof_operands >= p->opcodeInfo->operands ) { + p->state = PARSE_LABEL_OR_OPCODE; + p->nof_operands = 0; + } + break; + + default: + print( "ERROR: ILLEGAL PARSE STATE" ); + halt( ); + } + } else if( s.sym == S_number ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + int int_const = s.data.n; + + switch( p->state ) { + case PARSE_LABEL_OR_OPCODE: + case PARSE_OPCODE: + print( "ERROR: EXPECTED IDENTIFIER FOR LABEL OR OPCODE" ); + p->state = PARSE_OPERAND; + p->nof_operands = 0; + break; + + case PARSE_OPERAND: + if( p->debug ) { + *buf = '\0'; + strcat( buf, "OPERAND(" ); + inttohex( int_const, buf2 ); + strcat( buf, buf2 ); + strcat( buf, ")" ); + print( buf ); + } + p->operand[p->nof_operands].type = OPERAND_TYPE_INT_CONST; + p->operand[p->nof_operands].data.int_const = int_const; + p->nof_operands++; + if( p->nof_operands >= p->opcodeInfo->operands ) { + p->state = PARSE_LABEL_OR_OPCODE; + p->nof_operands = 0; + } + break; + + default: + print( "ERROR: ILLEGAL PARSE STATE" ); + halt( ); + } + } else if( s.sym == S_newline ) { + if( p->opcodeInfo ) { + output_code( p ); + p->LC += compute_codesize( p ); + } + p->state = PARSE_LABEL_OR_OPCODE; + p->opcodeInfo = 0; + p->nof_operands = 0; + } + s = scanner_scan( p->s ); + } +} + +static void print_labels( Parser *p ) +{ + intHashIterator it; + intHashEntry *entry; + + print( "LABELS:" ); + entry = inthash_getfirst( &p->labels, &it ); + while( entry ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + *buf = '\0'; + strcat( buf, " " ); + strcat( buf, entry->key ); + strcat( buf, " = " ); + inttohex( entry->value, buf2 ); + strcat( buf, buf2 ); + print( buf ); + entry = inthash_getnext( &it ); + } +} + +void parser_parse( Parser *p ) +{ + opcode_table_init( ); + + pass( p, 1 ); + + if( p->debug ) { + print_labels( p ); + } + + scanner_reset( p->s ); + p->LC = 0; + pass( p, 2 ); + + opcode_table_done( ); +} diff --git a/miniasm/parse.h b/miniasm/parse.h new file mode 100644 index 0000000..4187f29 --- /dev/null +++ b/miniasm/parse.h @@ -0,0 +1,39 @@ +#pragma once + +typedef enum ParseState { + PARSE_LABEL_OR_OPCODE, + PARSE_OPCODE, + PARSE_OPERAND +} ParseState; + +typedef enum OperandType { + OPERAND_TYPE_IDENT, + OPERAND_TYPE_INT_CONST +} OperandType; + +typedef struct Operand { + OperandType type; + union { + char ident[MAX_IDENT_LEN]; + int int_const; + } data; +} Operand; + +typedef struct Parser { + Scanner *s; + int debug; + intHashTable labels; + int LC; + ParseState state; + struct OpcodeInfo *opcodeInfo; + int nof_operands; + Operand operand[MAX_OPERANDS]; + char *code; + int last_opcode; + int pass; +} Parser; + +extern void parser_init( Parser *p, Scanner *s ); +extern void parser_done( Parser *p ); +extern void parser_debug( Parser *p, int enable ); +extern void parser_parse( Parser *p ); diff --git a/miniasm/scan.c b/miniasm/scan.c new file mode 100644 index 0000000..ab8dffb --- /dev/null +++ b/miniasm/scan.c @@ -0,0 +1,199 @@ +#include "const.h" +#include "scan.h" +#include "ctype.h" +#include "string.h" +#include "utils.h" +#include "io.h" + +void scanner_init( Scanner *s, char *src ) +{ + s->src = src; + scanner_reset( s ); +} + +void scanner_reset( Scanner *s ) +{ + s->peek = ' '; + s->row = 1; + s->col = 1; + s->pos = s->src; +} + +void scanner_done( Scanner *s ) +{ +} + +static char get_char( Scanner *s ) +{ + char c; + + c = *s->pos; + s->pos++; + + return c; +} + +static void skip_whitespace( Scanner *s ) +{ + for( ; ; s->peek = get_char( s ) ) { + s->col++; + if( s->peek == ' ' || s->peek == '\t' ) { + continue; + } else if( s->peek == '\n' ) { + s->row++; + s->col = 1; + break; + } else if( s->peek == '\0' ) { + break; + } else { + break; + } + } +} + +void scanner_skip_line( Scanner *s ) +{ + while( s->peek != '\0' && s->peek != '\n' ) { + s->col++; + s->peek = get_char( s ); + } + + if( s->peek == '\n' ) { + s->col++; + } +} + +Symbol get_int( Scanner *s ) +{ + Symbol sym; + + sym.sym = S_number; + sym.data.n = 0; + + do { + sym.data.n = sym.data.n * 10 + ( s->peek - '0' ); + s->peek = get_char( s ); + } while( isdigit( s->peek ) && ( s->peek != '\0' ) ); + + return sym; +} + +Symbol get_ident_or_label( Scanner *s ) +{ + Symbol newSym; + char *p; + + newSym.sym = S_ident; + newSym.data.s[0] = '\0'; + p = newSym.data.s; + + do { + *p++ = s->peek; + s->peek = get_char( s ); + } while( ( isalnum( s->peek ) || s->peek == '_' ) && !isspace( s->peek ) && ( s->peek != '\0' ) && ( s->peek != ':' ) && ( p - newSym.data.s < MAX_IDENT_LEN ) ); + + if( s->peek == ':' ) { + newSym.sym = S_label; + s->peek = get_char( s ); + } + + *p = '\0'; + + return newSym; +} + +Symbol scanner_scan( Scanner *s ) +{ + Symbol sym; + + if( s->peek == '\0' ) { + sym.sym = S_eof; + if( s->debug ) { + print( "SCANNER(EOF)" ); + } + return sym; + } + + skip_whitespace( s ); + + if( s->peek == '\0' ) { + sym.sym = S_eof; + if( s->debug ) { + print( "SCANNER(EOF)" ); + } + return sym; + } + + if( isdigit( s->peek ) ) { + sym = get_int( s ); + if( s->debug ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + buf[0] = '\0'; + strcat( buf, "SCANNER(NUMBER," ); + inttohex( sym.data.n, buf2 ); + strcat( buf, buf2 ); + strcat( buf, ")" ); + print( buf ); + } + return sym; + } else if( isalpha( s->peek ) ) { + sym = get_ident_or_label( s ); + if( s->debug ) { + char buf[MAX_TMP_LEN]; + buf[0] = '\0'; + if( sym.sym == S_ident ) { + strcat( buf, "SCANNER(IDENT," ); + } else if( sym.sym == S_label ) { + strcat( buf, "SCANNER(LABEL," ); + } else { + strcat( buf, "SCANNER(<unknown>," ); + } + strcat( buf, sym.data.s ); + strcat( buf, ")" ); + print( buf ); + } + return sym; + } else if( s->peek == '\n' ) { + sym.sym = S_newline; + if( s->debug ) { + print( "SCANNER(NEWLINE)" ); + } + s->peek = ' '; + return sym; + } else if( s->peek == ';' ) { + sym.sym = S_comment; + if( s->debug ) { + print( "SCANNER(COMMENT)" ); + } + s->peek = ' '; + return sym; + } else { + sym.sym = S_token; + sym.tag = s->peek; + s->peek = ' '; + if( s->debug ) { + char buf[MAX_TMP_LEN]; + char buf2[MAX_TMP_LEN]; + buf[0] = '\0'; + strcat( buf, "SCANNER(TOKEN," ); + inttohex( sym.tag, buf2 ); + strcat( buf, buf2 ); + strcat( buf, ")" ); + print( buf ); + } + return sym; + } + + return sym; +} + +void scanner_debug( Scanner *s, int enable ) +{ + s->debug = enable; + + if( s->debug ) { + print( "SCANNER DEBUGGING ENABLED" ); + } +} + diff --git a/miniasm/scan.h b/miniasm/scan.h new file mode 100644 index 0000000..c4d3b9d --- /dev/null +++ b/miniasm/scan.h @@ -0,0 +1,37 @@ +#pragma once + +typedef enum Sym { + S_undef, + S_number, + S_ident, + S_token, + S_newline, + S_label, + S_comment, + S_eof +} Sym; + +typedef struct Symbol { + Sym sym; + union { + char s[MAX_IDENT_LEN]; + int n; + } data; + int tag; +} Symbol; + +typedef struct Scanner { + int peek; + int row; + int col; + char *src; + char *pos; + int debug; +} Scanner; + +extern void scanner_init( Scanner *s, char *src ); +extern void scanner_reset( Scanner *s ); +extern void scanner_done( Scanner *s ); +extern Symbol scanner_scan( Scanner *s ); +extern void scanner_debug( Scanner *s, int enable ); +extern void scanner_skip_line( Scanner *s ); diff --git a/miniasm/test1.asm b/miniasm/test1.asm new file mode 100644 index 0000000..6d5cc90 --- /dev/null +++ b/miniasm/test1.asm @@ -0,0 +1,6 @@ +; test1 - halt + +begin: + hlt ; halt the processor +end: + diff --git a/miniasm/test2.asm b/miniasm/test2.asm new file mode 100644 index 0000000..6d09afc --- /dev/null +++ b/miniasm/test2.asm @@ -0,0 +1,8 @@ +; test2 - nops + +begin: + nop + nop + nop +end: hlt + diff --git a/miniasm/test3.asm b/miniasm/test3.asm new file mode 100644 index 0000000..97144a3 --- /dev/null +++ b/miniasm/test3.asm @@ -0,0 +1,13 @@ +; test3 - unconditional jumps + +begin: + jmp label1 + +label2: + jmp 6 + +label1: + jmp label2 + +; this is at absolute position 6 + hlt diff --git a/miniasm/test4.asm b/miniasm/test4.asm new file mode 100644 index 0000000..c181468 --- /dev/null +++ b/miniasm/test4.asm @@ -0,0 +1,17 @@ +; test4 - arithmetics + +begin: + mov ax, 5 + mov bx, 3 + add ax, bx + mov cx, 1 + sub ax, cx + or ax, 64 + mov dx, 8 + add dx, 19 + and dx, 15 + not dx + jmp end + +end: + hlt diff --git a/miniasm/test5.asm b/miniasm/test5.asm new file mode 100644 index 0000000..efa1d22 --- /dev/null +++ b/miniasm/test5.asm @@ -0,0 +1,19 @@ +; test5 - conditional jumps + +begin: + mov ax, 5 + mov bx, 5 + cmp ax, bx + je equals + jmp not_equals + +equals: + mov cx, 1 + jmp end + +not_equals: + mov cx, 0 + jmp end + +end: + hlt diff --git a/miniasm/test6.asm b/miniasm/test6.asm new file mode 100644 index 0000000..e452ec2 --- /dev/null +++ b/miniasm/test6.asm @@ -0,0 +1,12 @@ +; test6 - loop + +first: + mov ax, 10 + mov bx, 0 +loop: + add bx, 1 + cmp ax, bx + jne loop + +end: + hlt diff --git a/miniasm/test7.asm b/miniasm/test7.asm new file mode 100644 index 0000000..b822dea --- /dev/null +++ b/miniasm/test7.asm @@ -0,0 +1,12 @@ +; test7 - subroutines + +begin: + mov ax, 2 + mov bx, 3 + jsr add +end: + hlt + +add: + add ax, bx + ret diff --git a/miniasm/test8.asm b/miniasm/test8.asm new file mode 100644 index 0000000..f261f08 --- /dev/null +++ b/miniasm/test8.asm @@ -0,0 +1,25 @@ +; test8 - subroutines, saving registers to the stack + +begin: + mov ax, 1 + mov bx, 2 + mov cx, 3 + mov dx, 4 + jsr func +end: + hlt + +func: + push ax + push bx + push cx + push dx + mov ax, 10 + mov bx, 11 + mov cx, 12 + mov dx, 13 + pop dx + pop cx + pop bx + pop ax + ret |