summaryrefslogtreecommitdiff
path: root/miniasm
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2017-01-01 19:31:12 +0100
committerAndreas Baumann <mail@andreasbaumann.cc>2017-01-01 19:31:12 +0100
commit69fe7b182a1eedfb75c611f7dd35fa60200426f4 (patch)
tree329a0c6cc9b06c23d8782ece09f0f7dfa9b16b13 /miniasm
downloadcompilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.gz
compilertests-69fe7b182a1eedfb75c611f7dd35fa60200426f4.tar.bz2
initial checkin
Diffstat (limited to 'miniasm')
-rw-r--r--miniasm/README35
-rw-r--r--miniasm/const.h10
-rw-r--r--miniasm/main.c44
-rw-r--r--miniasm/optable.c56
-rw-r--r--miniasm/optable.h22
-rw-r--r--miniasm/parse.c543
-rw-r--r--miniasm/parse.h39
-rw-r--r--miniasm/scan.c199
-rw-r--r--miniasm/scan.h37
-rw-r--r--miniasm/test1.asm6
-rw-r--r--miniasm/test2.asm8
-rw-r--r--miniasm/test3.asm13
-rw-r--r--miniasm/test4.asm17
-rw-r--r--miniasm/test5.asm19
-rw-r--r--miniasm/test6.asm12
-rw-r--r--miniasm/test7.asm12
-rw-r--r--miniasm/test8.asm25
17 files changed, 1097 insertions, 0 deletions
diff --git a/miniasm/README b/miniasm/README
new file mode 100644
index 0000000..a7098f2
--- /dev/null
+++ b/miniasm/README
@@ -0,0 +1,35 @@
+Design
+------
+
+- one-pass: patching up addresses later or two-pass (scan location of all symbols first,
+ have them ready for the second pass), two-pass assemblers are easier to write and
+ relocation of addresses is possible in a deferred way, for instance at load time
+- intermediate listing-format
+- LC: location counter
+- extreme approach: encode the specific assembly language (e.g. register names) into
+ the grammar, another extreme approach: hand-coded parser to parse the lines. One
+ line == one instruction is something we usually don't like in a grammar ('\n' having
+ meaning in the grammar), on the other hand keywords in assembly ('INSTR MOV AX, B END')
+ looks a little bit clumpsy. Definitely when we have complex address calcualations
+ we can benefit from a real parser (generated or not).
+- one vs. two-pass
+- labels and local labels (FUNC: and .loop and FUNC.loop)
+- LC-relative addressing $+5
+
+Building
+--------
+
+gcc -I../minilib -I../miniemu -g -O0 -m32 -march=i386 -ffreestanding -Werror -Wall -Wno-return-type -pedantic -std=c89 -o miniasm *.c ../minilib/*.c
+clang -I../minilib -I../miniemu -g -O0 -march=i386 -fno-builtin -std=c89 -Werror -Wall -Wno-return-type -o miniasm *.c ../minilib/*.c
+
+Usage
+-----
+
+./miniasm test1.asm ../miniemu/test1.bin
+./miniasm test2.asm ../miniemu/test2.bin
+./miniasm test3.asm ../miniemu/test3.bin
+./miniasm test4.asm ../miniemu/test4.bin
+./miniasm test5.asm ../miniemu/test5.bin
+./miniasm test6.asm ../miniemu/test6.bin
+./miniasm test7.asm ../miniemu/test7.bin
+./miniasm test8.asm ../miniemu/test8.bin
diff --git a/miniasm/const.h b/miniasm/const.h
new file mode 100644
index 0000000..6ccc38d
--- /dev/null
+++ b/miniasm/const.h
@@ -0,0 +1,10 @@
+#pragma once
+
+enum {
+ MAX_LABEL_LEN = 12,
+ MAX_OP_LEN = 10,
+ MAX_IDENT_LEN = 12,
+ MAX_TMP_LEN = 80,
+ INIT_NOF_LABELS = 4,
+ DEFAULT_MEMORY_SIZE = 256
+};
diff --git a/miniasm/main.c b/miniasm/main.c
new file mode 100644
index 0000000..53b01b9
--- /dev/null
+++ b/miniasm/main.c
@@ -0,0 +1,44 @@
+#include "stdlib.h"
+#include "const.h"
+#include "io.h"
+#include "arena.h"
+#include "scan.h"
+#include "hash.h"
+#include "opcodes.h"
+#include "optable.h"
+#include "parse.h"
+
+int main( int argc, char *argv[] )
+{
+ char *src;
+ Scanner scanner;
+ Parser parser;
+
+ if( argc != 3 ) {
+ print( "USAGE: miniasm <module.asm> <module.bin>" );
+ return 1;
+ }
+
+ src = readallfile( argv[1] );
+ if( !src ) {
+ print( "UNABLE TO READ SOURCE FILE" );
+ return 1;
+ }
+
+ scanner_init( &scanner, src );
+ parser_init( &parser, &scanner );
+
+ scanner_debug( &scanner, 0 );
+ parser_debug( &parser, 1 );
+
+ parser_parse( &parser );
+
+ writefile( argv[2], parser.code, DEFAULT_MEMORY_SIZE );
+
+ parser_done( &parser );
+ scanner_done( &scanner );
+
+ deallocate( (void **)&src );
+
+ exit( 0 );
+}
diff --git a/miniasm/optable.c b/miniasm/optable.c
new file mode 100644
index 0000000..c569b74
--- /dev/null
+++ b/miniasm/optable.c
@@ -0,0 +1,56 @@
+#include "opcodes.h"
+#include "hash.h"
+#include "optable.h"
+
+OpcodeInfo opcodeTable[NOF_OPCODES] = {
+ { "mov", OPCODE_MOV_MASK, 2 },
+ { "or", OPCODE_OR, 2 },
+ { "and", OPCODE_AND, 2 },
+ { "sub", OPCODE_SUB, 2 },
+ { "add", OPCODE_ADD, 2 },
+ { "cmp", OPCODE_CMP, 2 },
+ { "jmp", OPCODE_JMP | OPCODE_JMP_JMP, 1 },
+ { "je", OPCODE_JMP | OPCODE_JMP_JE, 1 },
+ { "jne", OPCODE_JMP | OPCODE_JMP_JNE, 1 },
+ { "ja", OPCODE_JMP | OPCODE_JMP_JA, 1 },
+ { "jae", OPCODE_JMP | OPCODE_JMP_JAE, 1 },
+ { "jb", OPCODE_JMP | OPCODE_JMP_JB, 1 },
+ { "jbe", OPCODE_JMP | OPCODE_JMP_JBE, 1 },
+ { "jsr", OPCODE_JMP | OPCODE_JMP_JSR, 1 },
+ { "not", OPCODE_NOT, 1 },
+ { "push", OPCODE_STACK | OPCODE_STACK_PUSH, 1 },
+ { "pop", OPCODE_STACK | OPCODE_STACK_POP, 1 },
+ { "nop", OPCODE_NOP, 0 },
+ { "hlt", OPCODE_HLT, 0 },
+ { "ret", OPCODE_RET, 0 }
+};
+
+intHashTable opcode_ht;
+
+void opcode_table_init( )
+{
+ int i;
+
+ inthash_init( &opcode_ht, NOF_OPCODES );
+
+ for( i = 0; i < NOF_OPCODES; i++ ) {
+ inthash_set( &opcode_ht, opcodeTable[i].mnemonic, i );
+ }
+}
+
+void opcode_table_done( )
+{
+ inthash_done( &opcode_ht );
+}
+
+OpcodeInfo *lookup_opcode( char *mnemonic )
+{
+ int idx;
+
+ idx = inthash_get( &opcode_ht, mnemonic );
+ if( idx >= 0 ) {
+ return &opcodeTable[idx];
+ }
+
+ return 0;
+}
diff --git a/miniasm/optable.h b/miniasm/optable.h
new file mode 100644
index 0000000..bd3c5c2
--- /dev/null
+++ b/miniasm/optable.h
@@ -0,0 +1,22 @@
+#pragma once
+
+enum {
+ NOF_OPCODES = 20,
+ MAX_MNEMONIC_SIZE = 5,
+ MAX_OPERANDS = 2
+};
+
+typedef struct OpcodeInfo
+{
+ char mnemonic[MAX_MNEMONIC_SIZE];
+ Opcode opcode;
+ int operands;
+} OpcodeInfo;
+
+extern OpcodeInfo opcodeTable[NOF_OPCODES];
+
+extern intHashTable opcode_ht;
+
+extern void opcode_table_init( );
+extern void opcode_table_done( );
+extern OpcodeInfo *lookup_opcode( char *mnemonic );
diff --git a/miniasm/parse.c b/miniasm/parse.c
new file mode 100644
index 0000000..2a1c423
--- /dev/null
+++ b/miniasm/parse.c
@@ -0,0 +1,543 @@
+#include "const.h"
+#include "hash.h"
+#include "opcodes.h"
+#include "optable.h"
+#include "scan.h"
+#include "parse.h"
+#include "string.h"
+#include "io.h"
+#include "utils.h"
+#include "minilib.h"
+#include "arena.h"
+
+void parser_init( Parser *p, Scanner *s )
+{
+ p->LC = 0;
+ p->s = s;
+ inthash_init( &p->labels, INIT_NOF_LABELS );
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->opcodeInfo = 0;
+ p->nof_operands = 0;
+ p->code = (char *)allocate( DEFAULT_MEMORY_SIZE );
+ p->last_opcode = 0;
+}
+
+void parser_done( Parser *p )
+{
+ deallocate( (void *)&p->code );
+ inthash_done( &p->labels );
+}
+
+void parser_debug( Parser *p, int enable )
+{
+ p->debug = enable;
+
+ if( p->debug ) {
+ print( "PARSER DEBUGGING ENABLED" );
+ }
+}
+
+static OpcodeInfo *handle_opcode( Parser *p, char *mnemonic )
+{
+ OpcodeInfo *opcode;
+
+ opcode = lookup_opcode( mnemonic );
+ if( !opcode ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ERROR: ILLEGAL OPCODE '" );
+ strcat( buf, mnemonic );
+ strcat( buf, "'" );
+ print( buf );
+ return 0;
+ }
+
+ return opcode;
+}
+
+static void remember_label( Parser *p, char *label )
+{
+ int LC;
+
+ LC = inthash_get( &p->labels, label );
+
+ if( LC == -1 ) {
+ inthash_set( &p->labels, strdup( label ), p->LC );
+ } else {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ERROR: DUPLICATE LABEL '" );
+ strcat( buf, label );
+ strcat( buf, "'" );
+ print( buf );
+ }
+}
+
+static int lookup_label( Parser *p, char *label )
+{
+ int LC;
+
+ LC = inthash_get( &p->labels, label );
+
+ return LC;
+}
+
+static void print_operand( Operand *o )
+{
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ *buf = '\0';
+
+ switch( o->type ) {
+ case OPERAND_TYPE_IDENT:
+ strcat( buf, "OPERAND_IDENT(" );
+ strcat( buf, o->data.ident );
+ strcat( buf, ")" );
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ strcat( buf, "OPERAND_INT_CONST(" );
+ inttohex( o->data.int_const, buf2 );
+ strcat( buf, buf2 );
+ strcat( buf, ")" );
+ break;
+ }
+
+ print( buf );
+}
+
+static int is_register( char *s )
+{
+ if( ( strcmp( s, "ax" ) == 0 ) ||
+ ( strcmp( s, "bx" ) == 0 ) ||
+ ( strcmp( s, "cx" ) == 0 ) ||
+ ( strcmp( s, "dx" ) == 0 ) ) {
+ return 1;
+ }
+ return 0;
+}
+
+static int register_src_mask( char *s )
+{
+ if( strcmp( s, "ax" ) == 0 ) {
+ return OPCODE_SRC_AX_REG;
+ } else if( strcmp( s, "bx" ) == 0 ) {
+ return OPCODE_SRC_BX_REG;
+ } else if( strcmp( s, "cx" ) == 0 ) {
+ return OPCODE_SRC_CX_REG;
+ } else if( strcmp( s, "dx" ) == 0 ) {
+ return OPCODE_SRC_DX_REG;
+ }
+}
+
+static int register_dst_mask( char *s )
+{
+ if( strcmp( s, "ax" ) == 0 ) {
+ return OPCODE_DST_AX_REG;
+ } else if( strcmp( s, "bx" ) == 0 ) {
+ return OPCODE_DST_BX_REG;
+ } else if( strcmp( s, "cx" ) == 0 ) {
+ return OPCODE_DST_CX_REG;
+ } else if( strcmp( s, "dx" ) == 0 ) {
+ return OPCODE_DST_DX_REG;
+ }
+}
+
+static void output_code_jmp( Parser *p )
+{
+ int addr;
+ int opcode;
+
+ if( p->debug ) {
+ print( "ENCODING JMP" );
+ print_operand( &p->operand[0] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( p->pass == 2 ) {
+ addr = lookup_label( p, p->operand[0].data.ident );
+ if( addr == -1 ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ERROR: ILLEGAL LABEL IN JUMP '" );
+ strcat( buf, p->operand[0].data.ident );
+ strcat( buf, "'" );
+ print( buf );
+ return;
+ }
+ p->code[p->LC+1] = addr;
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ /* assuming an absolute constant address */
+ addr = p->operand[0].data.int_const;
+ p->code[p->LC+1] = addr;
+ break;
+ }
+
+ p->last_opcode = opcode;
+}
+
+static void output_code_not( Parser *p )
+{
+ int opcode;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ENCODING NOT" );
+ print( buf );
+ print_operand( &p->operand[0] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[0].data.ident ) ) {
+ opcode |= register_src_mask( p->operand[0].data.ident );
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" );
+ break;
+ }
+
+ p->code[p->LC] = opcode;
+
+ p->last_opcode = opcode;
+}
+
+static void output_code_stack( Parser *p )
+{
+ int opcode;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ENCODING NOT" );
+ print( buf );
+ print_operand( &p->operand[0] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[0].data.ident ) ) {
+ opcode |= register_src_mask( p->operand[0].data.ident );
+ }
+ break;
+
+ default:
+ print( "ERROR: PUSH AND POP EXCPECT A REGISTER AS OPERAND" );
+ break;
+ }
+
+ p->code[p->LC] = opcode;
+
+ p->last_opcode = opcode;
+}
+
+static void handle_2op( Parser *p, char *name )
+{
+ int opcode;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "ENCODING " );
+ strcat( buf, name );
+ print( buf );
+ print_operand( &p->operand[0] );
+ print_operand( &p->operand[1] );
+ }
+
+ opcode = p->opcodeInfo->opcode;
+
+ switch( p->operand[0].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[0].data.ident ) ) {
+ opcode |= register_dst_mask( p->operand[0].data.ident );
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ print( "ERROR: FIRST OPERAND CANNOT BE AN INTEGER" );
+ break;
+ }
+
+ switch( p->operand[1].type ) {
+ case OPERAND_TYPE_IDENT:
+ if( is_register( p->operand[1].data.ident ) ) {
+ opcode |= register_src_mask( p->operand[1].data.ident );
+ }
+ break;
+
+ case OPERAND_TYPE_INT_CONST:
+ opcode |= OPCODE_SRC_CONST;
+ p->code[p->LC+1] = p->operand[1].data.int_const;
+ break;
+ }
+
+ p->code[p->LC] = opcode;
+
+ p->last_opcode = opcode;
+}
+
+static void output_code_mov( Parser *p )
+{
+ handle_2op( p, "MOV" );
+}
+
+static void output_code_or( Parser *p )
+{
+ handle_2op( p, "OR" );
+}
+
+static void output_code_and( Parser *p )
+{
+ handle_2op( p, "AND" );
+}
+
+static void output_code_add( Parser *p )
+{
+ handle_2op( p, "ADD" );
+}
+
+static void output_code_cmp( Parser *p )
+{
+ handle_2op( p, "CMP" );
+}
+
+static void output_code_sub( Parser *p )
+{
+ handle_2op( p, "SUB" );
+}
+
+static void output_code( Parser *p )
+{
+ p->code[p->LC] = p->opcodeInfo->opcode;
+
+ if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_MOV_MASK ) {
+ output_code_mov( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_OR ) {
+ output_code_or( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_AND ) {
+ output_code_and( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_CMP ) {
+ output_code_cmp( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_SUB ) {
+ output_code_sub( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_TWO_OPERANDS ) == OPCODE_ADD ) {
+ output_code_add( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) {
+ output_code_jmp( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_NOT ) {
+ output_code_not( p );
+ } else if( ( p->opcodeInfo->opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_STACK ) {
+ output_code_stack( p );
+ } else if( ( ~p->opcodeInfo->opcode & OPCODE_ZERO_OPS_MASK ) == OPCODE_ZERO_OPS_MASK ) {
+ /* done above */
+ } else {
+ print( "ERROR: ILLEGAL OUTPUT CODE" );
+ }
+}
+
+static int compute_codesize( Parser *p )
+{
+ if( p->last_opcode & OPCODE_GROUP_TWO_OPERANDS ) {
+ if( ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_CONST ||
+ ( p->last_opcode & OPCODE_SRC_MASK ) == OPCODE_SRC_MEMORY ) {
+ print( "CODESIZE 2" );
+ return 2;
+ } else {
+ print( "CODESIZE 1" );
+ return 1;
+ }
+ } else if( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) {
+ if( ( p->last_opcode & OPCODE_GROUP_ONE_OPERAND ) == OPCODE_JMP ) {
+ print( "CODESIZE 2" );
+ return 2;
+ } else {
+ print( "CODESIZE 1" );
+ return 1;
+ }
+ } else {
+ print( "CODESIZE 1" );
+ return 1;
+ }
+}
+
+static void pass( Parser *p, int pass )
+{
+ Symbol s;
+
+ p->pass = pass;
+
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ inttohex( pass, buf2 );
+ *buf = '\0';
+ strcat( buf, "PASS " );
+ strcat( buf, buf2 );
+ print( buf );
+ }
+
+ s = scanner_scan( p->s );
+
+ while( s.sym != S_eof ) {
+ if( s.sym == S_comment ) {
+ scanner_skip_line( p->s );
+ if( p->debug ) {
+ print( "PARSER(COMMENT)" );
+ }
+ p->state = PARSE_LABEL_OR_OPCODE;
+ } else if( s.sym == S_label ) {
+ if( p->debug ) {
+ char buf[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, "LABEL(" );
+ strcat( buf, s.data.s );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ if( pass == 1 ) {
+ remember_label( p, s.data.s );
+ }
+ p->state = PARSE_OPCODE;
+ } else if( s.sym == S_ident ) {
+ char *ident = s.data.s;
+ char buf[MAX_TMP_LEN];
+
+ switch( p->state ) {
+ case PARSE_LABEL_OR_OPCODE:
+ case PARSE_OPCODE:
+ if( p->debug ) {
+ *buf = '\0';
+ strcat( buf, "OPCODE(" );
+ strcat( buf, ident );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ p->opcodeInfo = handle_opcode( p, ident );
+ if( p->opcodeInfo ) {
+ if( p->opcodeInfo->operands > 0 ) {
+ p->state = PARSE_OPERAND;
+ p->nof_operands = 0;
+ }
+ }
+ break;
+
+ case PARSE_OPERAND:
+ if( p->debug ) {
+ *buf = '\0';
+ strcat( buf, "OPERAND(" );
+ strcat( buf, ident );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ p->operand[p->nof_operands].type = OPERAND_TYPE_IDENT;
+ strcpy( p->operand[p->nof_operands].data.ident, ident );
+ p->nof_operands++;
+ if( p->nof_operands >= p->opcodeInfo->operands ) {
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->nof_operands = 0;
+ }
+ break;
+
+ default:
+ print( "ERROR: ILLEGAL PARSE STATE" );
+ halt( );
+ }
+ } else if( s.sym == S_number ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ int int_const = s.data.n;
+
+ switch( p->state ) {
+ case PARSE_LABEL_OR_OPCODE:
+ case PARSE_OPCODE:
+ print( "ERROR: EXPECTED IDENTIFIER FOR LABEL OR OPCODE" );
+ p->state = PARSE_OPERAND;
+ p->nof_operands = 0;
+ break;
+
+ case PARSE_OPERAND:
+ if( p->debug ) {
+ *buf = '\0';
+ strcat( buf, "OPERAND(" );
+ inttohex( int_const, buf2 );
+ strcat( buf, buf2 );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ p->operand[p->nof_operands].type = OPERAND_TYPE_INT_CONST;
+ p->operand[p->nof_operands].data.int_const = int_const;
+ p->nof_operands++;
+ if( p->nof_operands >= p->opcodeInfo->operands ) {
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->nof_operands = 0;
+ }
+ break;
+
+ default:
+ print( "ERROR: ILLEGAL PARSE STATE" );
+ halt( );
+ }
+ } else if( s.sym == S_newline ) {
+ if( p->opcodeInfo ) {
+ output_code( p );
+ p->LC += compute_codesize( p );
+ }
+ p->state = PARSE_LABEL_OR_OPCODE;
+ p->opcodeInfo = 0;
+ p->nof_operands = 0;
+ }
+ s = scanner_scan( p->s );
+ }
+}
+
+static void print_labels( Parser *p )
+{
+ intHashIterator it;
+ intHashEntry *entry;
+
+ print( "LABELS:" );
+ entry = inthash_getfirst( &p->labels, &it );
+ while( entry ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ *buf = '\0';
+ strcat( buf, " " );
+ strcat( buf, entry->key );
+ strcat( buf, " = " );
+ inttohex( entry->value, buf2 );
+ strcat( buf, buf2 );
+ print( buf );
+ entry = inthash_getnext( &it );
+ }
+}
+
+void parser_parse( Parser *p )
+{
+ opcode_table_init( );
+
+ pass( p, 1 );
+
+ if( p->debug ) {
+ print_labels( p );
+ }
+
+ scanner_reset( p->s );
+ p->LC = 0;
+ pass( p, 2 );
+
+ opcode_table_done( );
+}
diff --git a/miniasm/parse.h b/miniasm/parse.h
new file mode 100644
index 0000000..4187f29
--- /dev/null
+++ b/miniasm/parse.h
@@ -0,0 +1,39 @@
+#pragma once
+
+typedef enum ParseState {
+ PARSE_LABEL_OR_OPCODE,
+ PARSE_OPCODE,
+ PARSE_OPERAND
+} ParseState;
+
+typedef enum OperandType {
+ OPERAND_TYPE_IDENT,
+ OPERAND_TYPE_INT_CONST
+} OperandType;
+
+typedef struct Operand {
+ OperandType type;
+ union {
+ char ident[MAX_IDENT_LEN];
+ int int_const;
+ } data;
+} Operand;
+
+typedef struct Parser {
+ Scanner *s;
+ int debug;
+ intHashTable labels;
+ int LC;
+ ParseState state;
+ struct OpcodeInfo *opcodeInfo;
+ int nof_operands;
+ Operand operand[MAX_OPERANDS];
+ char *code;
+ int last_opcode;
+ int pass;
+} Parser;
+
+extern void parser_init( Parser *p, Scanner *s );
+extern void parser_done( Parser *p );
+extern void parser_debug( Parser *p, int enable );
+extern void parser_parse( Parser *p );
diff --git a/miniasm/scan.c b/miniasm/scan.c
new file mode 100644
index 0000000..ab8dffb
--- /dev/null
+++ b/miniasm/scan.c
@@ -0,0 +1,199 @@
+#include "const.h"
+#include "scan.h"
+#include "ctype.h"
+#include "string.h"
+#include "utils.h"
+#include "io.h"
+
+void scanner_init( Scanner *s, char *src )
+{
+ s->src = src;
+ scanner_reset( s );
+}
+
+void scanner_reset( Scanner *s )
+{
+ s->peek = ' ';
+ s->row = 1;
+ s->col = 1;
+ s->pos = s->src;
+}
+
+void scanner_done( Scanner *s )
+{
+}
+
+static char get_char( Scanner *s )
+{
+ char c;
+
+ c = *s->pos;
+ s->pos++;
+
+ return c;
+}
+
+static void skip_whitespace( Scanner *s )
+{
+ for( ; ; s->peek = get_char( s ) ) {
+ s->col++;
+ if( s->peek == ' ' || s->peek == '\t' ) {
+ continue;
+ } else if( s->peek == '\n' ) {
+ s->row++;
+ s->col = 1;
+ break;
+ } else if( s->peek == '\0' ) {
+ break;
+ } else {
+ break;
+ }
+ }
+}
+
+void scanner_skip_line( Scanner *s )
+{
+ while( s->peek != '\0' && s->peek != '\n' ) {
+ s->col++;
+ s->peek = get_char( s );
+ }
+
+ if( s->peek == '\n' ) {
+ s->col++;
+ }
+}
+
+Symbol get_int( Scanner *s )
+{
+ Symbol sym;
+
+ sym.sym = S_number;
+ sym.data.n = 0;
+
+ do {
+ sym.data.n = sym.data.n * 10 + ( s->peek - '0' );
+ s->peek = get_char( s );
+ } while( isdigit( s->peek ) && ( s->peek != '\0' ) );
+
+ return sym;
+}
+
+Symbol get_ident_or_label( Scanner *s )
+{
+ Symbol newSym;
+ char *p;
+
+ newSym.sym = S_ident;
+ newSym.data.s[0] = '\0';
+ p = newSym.data.s;
+
+ do {
+ *p++ = s->peek;
+ s->peek = get_char( s );
+ } while( ( isalnum( s->peek ) || s->peek == '_' ) && !isspace( s->peek ) && ( s->peek != '\0' ) && ( s->peek != ':' ) && ( p - newSym.data.s < MAX_IDENT_LEN ) );
+
+ if( s->peek == ':' ) {
+ newSym.sym = S_label;
+ s->peek = get_char( s );
+ }
+
+ *p = '\0';
+
+ return newSym;
+}
+
+Symbol scanner_scan( Scanner *s )
+{
+ Symbol sym;
+
+ if( s->peek == '\0' ) {
+ sym.sym = S_eof;
+ if( s->debug ) {
+ print( "SCANNER(EOF)" );
+ }
+ return sym;
+ }
+
+ skip_whitespace( s );
+
+ if( s->peek == '\0' ) {
+ sym.sym = S_eof;
+ if( s->debug ) {
+ print( "SCANNER(EOF)" );
+ }
+ return sym;
+ }
+
+ if( isdigit( s->peek ) ) {
+ sym = get_int( s );
+ if( s->debug ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ buf[0] = '\0';
+ strcat( buf, "SCANNER(NUMBER," );
+ inttohex( sym.data.n, buf2 );
+ strcat( buf, buf2 );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ return sym;
+ } else if( isalpha( s->peek ) ) {
+ sym = get_ident_or_label( s );
+ if( s->debug ) {
+ char buf[MAX_TMP_LEN];
+ buf[0] = '\0';
+ if( sym.sym == S_ident ) {
+ strcat( buf, "SCANNER(IDENT," );
+ } else if( sym.sym == S_label ) {
+ strcat( buf, "SCANNER(LABEL," );
+ } else {
+ strcat( buf, "SCANNER(<unknown>," );
+ }
+ strcat( buf, sym.data.s );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ return sym;
+ } else if( s->peek == '\n' ) {
+ sym.sym = S_newline;
+ if( s->debug ) {
+ print( "SCANNER(NEWLINE)" );
+ }
+ s->peek = ' ';
+ return sym;
+ } else if( s->peek == ';' ) {
+ sym.sym = S_comment;
+ if( s->debug ) {
+ print( "SCANNER(COMMENT)" );
+ }
+ s->peek = ' ';
+ return sym;
+ } else {
+ sym.sym = S_token;
+ sym.tag = s->peek;
+ s->peek = ' ';
+ if( s->debug ) {
+ char buf[MAX_TMP_LEN];
+ char buf2[MAX_TMP_LEN];
+ buf[0] = '\0';
+ strcat( buf, "SCANNER(TOKEN," );
+ inttohex( sym.tag, buf2 );
+ strcat( buf, buf2 );
+ strcat( buf, ")" );
+ print( buf );
+ }
+ return sym;
+ }
+
+ return sym;
+}
+
+void scanner_debug( Scanner *s, int enable )
+{
+ s->debug = enable;
+
+ if( s->debug ) {
+ print( "SCANNER DEBUGGING ENABLED" );
+ }
+}
+
diff --git a/miniasm/scan.h b/miniasm/scan.h
new file mode 100644
index 0000000..c4d3b9d
--- /dev/null
+++ b/miniasm/scan.h
@@ -0,0 +1,37 @@
+#pragma once
+
+typedef enum Sym {
+ S_undef,
+ S_number,
+ S_ident,
+ S_token,
+ S_newline,
+ S_label,
+ S_comment,
+ S_eof
+} Sym;
+
+typedef struct Symbol {
+ Sym sym;
+ union {
+ char s[MAX_IDENT_LEN];
+ int n;
+ } data;
+ int tag;
+} Symbol;
+
+typedef struct Scanner {
+ int peek;
+ int row;
+ int col;
+ char *src;
+ char *pos;
+ int debug;
+} Scanner;
+
+extern void scanner_init( Scanner *s, char *src );
+extern void scanner_reset( Scanner *s );
+extern void scanner_done( Scanner *s );
+extern Symbol scanner_scan( Scanner *s );
+extern void scanner_debug( Scanner *s, int enable );
+extern void scanner_skip_line( Scanner *s );
diff --git a/miniasm/test1.asm b/miniasm/test1.asm
new file mode 100644
index 0000000..6d5cc90
--- /dev/null
+++ b/miniasm/test1.asm
@@ -0,0 +1,6 @@
+; test1 - halt
+
+begin:
+ hlt ; halt the processor
+end:
+
diff --git a/miniasm/test2.asm b/miniasm/test2.asm
new file mode 100644
index 0000000..6d09afc
--- /dev/null
+++ b/miniasm/test2.asm
@@ -0,0 +1,8 @@
+; test2 - nops
+
+begin:
+ nop
+ nop
+ nop
+end: hlt
+
diff --git a/miniasm/test3.asm b/miniasm/test3.asm
new file mode 100644
index 0000000..97144a3
--- /dev/null
+++ b/miniasm/test3.asm
@@ -0,0 +1,13 @@
+; test3 - unconditional jumps
+
+begin:
+ jmp label1
+
+label2:
+ jmp 6
+
+label1:
+ jmp label2
+
+; this is at absolute position 6
+ hlt
diff --git a/miniasm/test4.asm b/miniasm/test4.asm
new file mode 100644
index 0000000..c181468
--- /dev/null
+++ b/miniasm/test4.asm
@@ -0,0 +1,17 @@
+; test4 - arithmetics
+
+begin:
+ mov ax, 5
+ mov bx, 3
+ add ax, bx
+ mov cx, 1
+ sub ax, cx
+ or ax, 64
+ mov dx, 8
+ add dx, 19
+ and dx, 15
+ not dx
+ jmp end
+
+end:
+ hlt
diff --git a/miniasm/test5.asm b/miniasm/test5.asm
new file mode 100644
index 0000000..efa1d22
--- /dev/null
+++ b/miniasm/test5.asm
@@ -0,0 +1,19 @@
+; test5 - conditional jumps
+
+begin:
+ mov ax, 5
+ mov bx, 5
+ cmp ax, bx
+ je equals
+ jmp not_equals
+
+equals:
+ mov cx, 1
+ jmp end
+
+not_equals:
+ mov cx, 0
+ jmp end
+
+end:
+ hlt
diff --git a/miniasm/test6.asm b/miniasm/test6.asm
new file mode 100644
index 0000000..e452ec2
--- /dev/null
+++ b/miniasm/test6.asm
@@ -0,0 +1,12 @@
+; test6 - loop
+
+first:
+ mov ax, 10
+ mov bx, 0
+loop:
+ add bx, 1
+ cmp ax, bx
+ jne loop
+
+end:
+ hlt
diff --git a/miniasm/test7.asm b/miniasm/test7.asm
new file mode 100644
index 0000000..b822dea
--- /dev/null
+++ b/miniasm/test7.asm
@@ -0,0 +1,12 @@
+; test7 - subroutines
+
+begin:
+ mov ax, 2
+ mov bx, 3
+ jsr add
+end:
+ hlt
+
+add:
+ add ax, bx
+ ret
diff --git a/miniasm/test8.asm b/miniasm/test8.asm
new file mode 100644
index 0000000..f261f08
--- /dev/null
+++ b/miniasm/test8.asm
@@ -0,0 +1,25 @@
+; test8 - subroutines, saving registers to the stack
+
+begin:
+ mov ax, 1
+ mov bx, 2
+ mov cx, 3
+ mov dx, 4
+ jsr func
+end:
+ hlt
+
+func:
+ push ax
+ push bx
+ push cx
+ push dx
+ mov ax, 10
+ mov bx, 11
+ mov cx, 12
+ mov dx, 13
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+ ret