/* * minimalistic assembler for IA-32, i386 * * This assembler is a one pass assembler as we do not want to have * to implement re-seekable files (neither input nor output). * * We only implement the absolute minimal opcodes and addressing modes. * * B8+r XX XX XX XX mov r32, imm32 * 50+r push r32 * 58+r pop r32 * A3 XX XX XX XX mov moffs32, eax * A2 XX XX XX XX mov moffs8, al * A1 XX XX XX XX mov eax, moffs32 * A0 XX XX XX XX mov al, moffs8 * 89 rr mov [r32], eax * 88 rr mov [r32], al * 8A rr mov eax, [r32] * 8B rr mov al, [r32] * 01 XX add r32, r32 * 29 XX sub r32, r32 * F7 E3 mul ebx * F7 F3 div ebx * 39 XX cmp r32, r32 * 74 XX je rel8 * 75 XX jne rel8 * 72 XX jb rel8 * 76 XX jbe rel8 * 77 XX ja rel8 * 73 XX jae rel8 * EB XX jmp rel8 * E9 XX XX XX XX jmp rel32 * E8 XX XX XX XX call rel32 * C3 ret * C2 XX XX ret imm16 * F4 hlt * CD XX int nnn * 90 nop * * r32/rr * eax 000 * ecx 001 * edx 010 * ebx 011 * * imm32 * little endian 32-bit constant * * moffs32 * offset to DS or SS * * rel8 * rel32 * relative address offset (8/32 bits) * * imm16 * little endian 16-bit constant * * format binary * use32 * org 0x00000000 * dd 0x00000000 (1 to N arguments), integer only * dw 0x0000 (1 to N arguments), integer only * db 0x00 (1 to N arguments), combinable with string literals * db "abcd" */ /* constants */ enum { MAX_IDENT_LEN = 64, MAX_NUMBER_LEN = 10, MAX_HEXNUMBER_LEN = 8, MAX_STRING_LEN = 64, MAX_PASSES = 10, MAX_NOF_OPERANDS = 64 }; static int DEBUG_GETCHAR = 0; static int DEBUG_SCANNER = 0; static int DEBUG_PARSER = 0; /* scanner */ typedef enum { S_format, S_binary, S_use32, S_newline, S_org, S_ident, S_number, S_string, S_colon, S_comma, S_lbrak, S_rbrak, S_plus, S_minus, S_star, S_slash, S_equals, S_current_org, S_current_addr, S_eof } S_Symbol; static char *symname[S_eof+1] = { "format", "binary", "use32", "newline", "org", "ident", "number", "string", ":", ",", "[", "]", "+", "-", "*", "/", "=", "$$", "$", "eof" }; static int col; static int row; static int look; static S_Symbol sym; static char ident[MAX_IDENT_LEN+1]; static int num; static char str[MAX_STRING_LEN+1]; static S_Symbol peek_sym = S_eof; static char peek_ident[MAX_IDENT_LEN+1]; static int peek_num; static void Err( char *s, va_list args ) { fprintf( stderr, "Error line %d, pos %d: ", row, col ); vfprintf( stderr, s, args ); fputs( "\n", stderr ); fflush( stderr ); } static void Halt( int code ) { exit( code ); } static void Abort( char *s, ... ) { va_list args; va_start( args, s ); Err( s, args ); va_end( args ); Halt( EXIT_FAILURE ); } static void *Allocate( unsigned int size ) { char *p; p = malloc( size ); if( p == NULL ) { Abort( "Out of memory" ); } return p; } static char *AllocateAndCopyStr( char *s ) { char *d; int len = strlen( s ); if( len > MAX_STRING_LEN ) { Abort( "Too long string literal, should not happen" ); } d = (char *)Allocate( len + 1 ); strlcpy( d, s, MAX_STRING_LEN ); return d; } static int getChar( void ) { int c; c = getchar( ); if( DEBUG_GETCHAR ) { if( c == '\n' ) { fprintf( stderr, "getchar -> '\\n'\n" ); } else if( c == EOF ) { fprintf( stderr, "getchar -> 'EOF'\n" ); } else { fprintf( stderr, "getchar -> '%c'\n", c ); } } if( c == EOF ) { return c; } col++; if( c == '\n' ) { col = 1; row++; } return c; } static void ungetChar( int c ) { ungetc( c, stdin ); } static int isWhite( int c ) { if( c == ' ' || c == '\r' || c == '\t' ) return 1; return 0; } static int isAlpha( int c ) { if( ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ) ) return 1; return 0; } static int isDigit( int c ) { if( ( c >= '0' && c <= '9' ) ) return 1; return 0; } static int isHexDigit( int c ) { if( ( c >= '0' && c <= '9' ) || ( c >= 'a' && c <= 'f' ) || ( c >= 'A' && c <= 'F' ) ) return 1; return 0; } static int isSpecial( int c ) { switch( c ) { case '_': case ' ': return 1; default: return 0; } } static void skipWhite( void ) { while( isWhite( look ) ) { look = getChar( ); } } static void number( void ) { int n = 0; int negative = 0; if( look == '-' ) { negative = 1; look = getChar( ); } if( isDigit( look ) ) { num = look - '0'; look = getChar( ); while( isDigit( look ) && n < MAX_NUMBER_LEN ) { n++; num = 10 * num + ( look - '0' ); look = getChar( ); } if( n == MAX_NUMBER_LEN ) { Abort( "Number exceeds maximal length" ); } sym = S_number; } if( negative ) { num = ~num + 1; } } static void hexnumber( void ) { int n = 0; look = getChar( ); if( isHexDigit( look ) ) { if( isDigit( look ) ) { num = look - '0'; } else if( look >= 'a' && look <= 'f' ) { num = 10 + look - 'a'; } else { num = 10 + look - 'A'; } look = getChar( ); while( isHexDigit( look ) && n < MAX_HEXNUMBER_LEN ) { n++; if( isDigit( look ) ) { num = 16 * num + ( look - '0' ); } else if( look >= 'a' && look <= 'f' ) { num = 16 * num + ( 10 + look - 'a' ); } else { num = 16 * num + ( 10 + look - 'A' ); } look = getChar( ); } if( n == MAX_HEXNUMBER_LEN ) { Abort( "Hexadecimal number exceeds maximal length" ); } sym = S_number; } } static void string( void ) { int n = 0; if( look == '"' ) { look = getChar( ); while( ( isDigit( look ) || isAlpha( look ) || isSpecial( look ) ) && n < MAX_STRING_LEN ) { str[n] = look; n++; look = getChar( ); } if( n == MAX_STRING_LEN ) { Abort( "String exceeds maximal length" ); } str[n] = '\0'; if( look != '\"' ) { Abort( "Unterminated string literal" ); } look = getChar( ); } } static void identifier( void ) { int n = 0; if( isAlpha( look ) || ( look == '_' ) ) { ident[n] = look; n++; look = getChar( ); while( ( isAlpha( look ) || isDigit( look ) || ( look == '_' ) ) && n < MAX_IDENT_LEN ) { ident[n] = look; n++; look = getChar( ); } ident[n] = '\0'; if( n == MAX_IDENT_LEN ) { Abort( "Identifier exceeds maximal length" ); } sym = S_ident; } } static void skipEolComment( void ) { look = getChar( ); while( look != '\n' ) { look = getChar( ); } } static S_Symbol getSym( void ) { S_Symbol s = S_eof; if( peek_sym != S_eof ) { s = peek_sym; if( s == S_ident ) { strlcpy( ident, peek_ident, MAX_IDENT_LEN ); } else { num = peek_num; } peek_sym = S_eof; return s; } skipWhite( ); switch( look ) { case '\n': look = getChar( ); s = S_newline; break; case '$': look = getChar( ); if( look == '$' ) { look = getChar( ); s = S_current_org; } else if( ( look >= '0' && look <= '9' ) || ( look >= 'a' && look <= 'f' ) || ( look >= 'A' && look <= 'F' ) ) { ungetChar( look ); s = S_number; hexnumber( ); } else { s = S_current_addr; } break; case '\"': string( ); s = S_string; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': number( ); s = S_number; break; case '-': look = getChar( ); if( look >= '0' && look <= '9' ) { ungetChar( look ); s = S_number; look = '-'; number( ); } else { s = S_minus; } break; case '+': look = getChar( ); s = S_plus; break; case '*': look = getChar( ); s = S_star; break; case '/': look = getChar( ); s = S_slash; break; case '=': look = getChar( ); s = S_equals; break; case 'f': identifier( ); if( strcmp( ident, "format" ) == 0 ) { s = S_format; } else { s = S_ident; } break; case 'b': identifier( ); if( strcmp( ident, "binary" ) == 0 ) { s = S_binary; } else { s = S_ident; } break; case 'o': identifier( ); if( strcmp( ident, "org" ) == 0 ) { s = S_org; } else { s = S_ident; } break; case 'u': identifier( ); if( strcmp( ident, "use32" ) == 0 ) { s = S_use32; } else { s = S_ident; } break; case ':': look = getChar( ); s = S_colon; break; case ';': look = getChar( ); skipEolComment( ); return getSym( ); case ',': look = getChar( ); s = S_comma; break; case '[': look = getChar( ); s = S_lbrak; break; case ']': look = getChar( ); s = S_rbrak; break; case '_': case 'a': case 'c': case 'd': case 'e': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'p': case 'q': case 'r': case 's': case 't': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': identifier( ); s = S_ident; break; case EOF: s = S_eof; break; default: Abort( "Illegal character '%c'", (char)look ); } if( DEBUG_SCANNER ) { switch( s ) { case S_ident: fprintf( stderr, "sym -> %s( '%s' )\n", symname[s], ident ); break; case S_number: fprintf( stderr, "sym -> %s( '%d' )\n", symname[s], num ); break; case S_string: fprintf( stderr, "sym -> %s( '%s' )\n", symname[s], str ); break; default: fprintf( stderr, "sym -> %s\n", symname[s] ); } } return s; } static S_Symbol peekSym( void ) { S_Symbol s, save; char save_ident[MAX_IDENT_LEN+1]; int save_num; save = sym; if( sym == S_ident ) { strlcpy( save_ident, ident, MAX_IDENT_LEN ); } else if( sym == S_number ) { save_num = num; } s = getSym( ); peek_sym = s; if( s == S_ident ) { strlcpy( peek_ident, ident, MAX_IDENT_LEN ); } else if( s == S_number ) { peek_num = num; } sym = save; if( sym == S_ident ) { strlcpy( ident, save_ident, MAX_IDENT_LEN ); } else if( sym == S_number ) { num = save_num; } return s; } /* codegen */ static void Emit( char *s, ... ) { va_list args; va_start( args, s ); vprintf( s, args ); va_end( args ); fflush( stdout ); } static void Emit_char( int c ) { fputc( c, stdout ); } static void Emit_byte( int d ) { Emit_char( ( d & 0xFF ) ); } static void Emit_word_little_endian( int d ) { Emit_char( ( d & 0xFF ) ); Emit_char( ( d >> 8 ) & 0xFF ); } static void Emit_double_little_endian( int d ) { Emit_char( ( d & 0xFF ) ); Emit_char( ( d >> 8 ) & 0xFF ); Emit_char( ( d >> 16 ) & 0xFF ); Emit_char( ( d >> 24 ) & 0xFF ); } /* parser */ typedef struct Symbol { char *name; int addr; int defined; struct Symbol *next; struct Symbol *next_label; } Symbol; struct Symbol *symbol = NULL; enum { ADDRESS_UNDEFINED = 0x7FFFFFFF, ADDRESS_CURRENT_ORG = 0x7FFFFFFE, ADDRESS_CURRENT_ADDR = 0x7FFFFFFD }; static Symbol *get_symbol( char *name ) { Symbol *sym = symbol; while( sym != NULL ) { if( strcmp( sym->name, name ) == 0 ) { return sym; } sym = sym->next; } return NULL; } static Symbol *insert_symbol( char *name ) { Symbol *sym; sym = get_symbol( name ); if( sym != NULL ) { Abort( "'%s' is already defined.", name ); } sym = Allocate( sizeof( Symbol ) ); sym->name = Allocate( strlen( name ) + 1 ); strlcpy( sym->name, name, strlen( name ) + 1 ); sym->addr = ADDRESS_UNDEFINED; sym->defined = 0; sym->next = symbol; sym->next_label = NULL; symbol = sym; return sym; } static void free_symbol( Symbol *sym ) { free( sym->name ); free( sym ); } static void Expect( S_Symbol expect ) { if( sym == expect ) { sym = getSym( ); } else { Abort( "Expected symbol '%s'", symname[expect] ); } } static Symbol *parseLabel( void ) { Symbol *symbol; symbol = get_symbol( ident ); if( symbol != NULL ) { if( symbol->defined ) { Abort( "label '%s' has already been defined", ident ); } else { symbol->defined = 1; } } else { symbol = insert_symbol( ident ); } sym = getSym( ); Expect( S_colon ); return symbol; } typedef enum { OPCODE_PSEUDO_ORG, OPCODE_PSEUDO_DD, OPCODE_PSEUDO_DW, OPCODE_PSEUDO_DB, OPCODE_PSEUDO_ASSIGN, OPCODE_MOV, OPCODE_PUSH, OPCODE_POP, OPCODE_ADD, OPCODE_SUB, OPCODE_MUL, OPCODE_DIV, OPCODE_CMP, OPCODE_JMP, OPCODE_JE, OPCODE_JNE, OPCODE_JB, OPCODE_JBE, OPCODE_JA, OPCODE_JAE, OPCODE_INT, OPCODE_HLT, OPCODE_NOP, OPCODE_CALL, OPCODE_RET, OPCODE_UNKNOWN } Opcode; static char *opcodename[OPCODE_UNKNOWN+1] = { "org", "dd", "dw", "db", "=", "mov", "push", "pop", "add", "sub", "mul", "div", "cmp", "jmp", "je", "jne", "jb", "jbe", "ja", "jae", "int", "hlt", "nop", "call", "ret", "" }; typedef enum { OPERAND_ABSOLUTE = 1, OPERAND_REGISTER = 2, OPERAND_MEMORY_DIRECT = 4, OPERAND_MEMORY_INDIRECT = 8, OPERAND_REGISTER_INDIRECT = 16 } OperandType; typedef enum { REGISTER_EAX = 0, REGISTER_ECX = 1, REGISTER_EDX = 2, REGISTER_EBX = 3, REGISTER_ESP = 4, REGISTER_EBP = 5, REGISTER_AL = 6, REGISTER_UNKNOWN = 99 } Register; static char *registername[7] = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "al" }; typedef struct OperandInfo { OperandType type; int num; char *str; Register reg; /* int addr; */ /* Symbol *symbol; */ struct OperandInfo *next; struct ExpressionNode *node; } OperandInfo; typedef struct OpcodeInfo { Symbol *label; int addr; Opcode opcode; int min_operands; int max_operands; OperandInfo *operand; struct OpcodeInfo *next; int size; } OpcodeInfo; static OpcodeInfo *opcodes_head = NULL; static OpcodeInfo *opcodes_tail = NULL; static OpcodeInfo *parseOpcode( void ) { OpcodeInfo *opcode_info = Allocate( sizeof( OpcodeInfo ) ); opcode_info->label = NULL; opcode_info->addr = ADDRESS_UNDEFINED; opcode_info->opcode = OPCODE_UNKNOWN; opcode_info->operand = NULL; opcode_info->next = NULL; opcode_info->size = 0; if( sym == S_ident ) { switch( ident[0] ) { case 'a': if( strcmp( ident, "add" ) == 0 ) { opcode_info->opcode = OPCODE_ADD; opcode_info->min_operands = 2; opcode_info->max_operands = 2; opcode_info->size = 2; } break; case 'c': if( strcmp( ident, "cmp" ) == 0 ) { opcode_info->opcode = OPCODE_CMP; opcode_info->min_operands = 2; opcode_info->max_operands = 2; opcode_info->size = 2; } else if( strcmp( ident, "call" ) == 0 ) { opcode_info->opcode = OPCODE_CALL; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 5; } break; case 'd': if( strcmp( ident, "dd" ) == 0 ) { opcode_info->opcode = OPCODE_PSEUDO_DD; opcode_info->min_operands = 1; opcode_info->max_operands = MAX_NOF_OPERANDS; opcode_info->size = 4; } else if( strcmp( ident, "db" ) == 0 ) { opcode_info->opcode = OPCODE_PSEUDO_DB; opcode_info->min_operands = 1; opcode_info->max_operands = MAX_NOF_OPERANDS; opcode_info->size = 1; } else if( strcmp( ident, "dw" ) == 0 ) { opcode_info->opcode = OPCODE_PSEUDO_DW; opcode_info->min_operands = 1; opcode_info->max_operands = MAX_NOF_OPERANDS; opcode_info->size = 2; } else if( strcmp( ident, "div" ) == 0 ) { opcode_info->opcode = OPCODE_DIV; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; case 'h': if( strcmp( ident, "hlt" ) == 0 ) { opcode_info->opcode = OPCODE_HLT; opcode_info->min_operands = 0; opcode_info->max_operands = 0; opcode_info->size = 1; } break; case 'i': if( strcmp( ident, "int" ) == 0 ) { opcode_info->opcode = OPCODE_INT; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; case 'j': switch( ident[1] ) { case 'm': if( strcmp( ident, "jmp" ) == 0 ) { opcode_info->opcode = OPCODE_JMP; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; case 'e': if( strcmp( ident, "je" ) == 0 ) { opcode_info->opcode = OPCODE_JE; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; case 'n': if( strcmp( ident, "jne" ) == 0 ) { opcode_info->opcode = OPCODE_JNE; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; case 'b': if( strcmp( ident, "jb" ) == 0 ) { opcode_info->opcode = OPCODE_JB; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } else if( strcmp( ident, "jbe" ) == 0 ) { opcode_info->opcode = OPCODE_JBE; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; case 'a': if( strcmp( ident, "ja" ) == 0 ) { opcode_info->opcode = OPCODE_JA; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } else if( strcmp( ident, "jae" ) == 0 ) { opcode_info->opcode = OPCODE_JAE; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; } break; case 'm': if( strcmp( ident, "mov" ) == 0 ) { opcode_info->opcode = OPCODE_MOV; opcode_info->min_operands = 2; opcode_info->max_operands = 2; opcode_info->size = 5; } else if( strcmp( ident, "mul" ) == 0 ) { opcode_info->opcode = OPCODE_MUL; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 2; } break; case 'n': if( strcmp( ident, "nop" ) == 0 ) { opcode_info->opcode = OPCODE_NOP; opcode_info->min_operands = 0; opcode_info->max_operands = 0; opcode_info->size = 1; } break; case 'p': if( strcmp( ident, "push" ) == 0 ) { opcode_info->opcode = OPCODE_PUSH; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 1; } else if( strcmp( ident, "pop" ) == 0 ) { opcode_info->opcode = OPCODE_POP; opcode_info->min_operands = 1; opcode_info->max_operands = 1; opcode_info->size = 1; } break; case 'r': if( strcmp( ident, "ret" ) == 0 ) { opcode_info->opcode = OPCODE_RET; opcode_info->min_operands = 0; opcode_info->max_operands = 1; opcode_info->size = 1; } break; case 's': if( strcmp( ident, "sub" ) == 0 ) { opcode_info->opcode = OPCODE_SUB; opcode_info->min_operands = 2; opcode_info->max_operands = 2; opcode_info->size = 2; } break; } } if( opcode_info->opcode == OPCODE_UNKNOWN ) { Abort( "Unknown opcode '%s'", ident ); } sym = getSym( ); return opcode_info; } static void free_expression_node( struct ExpressionNode *node ); static void free_operand_info( OperandInfo *operand_info ) { if( operand_info->str != NULL ) { free( operand_info->str ); } if( operand_info->node != NULL ) { free_expression_node( operand_info->node ); } free( operand_info ); } static void free_opcode_info( OpcodeInfo *opcode_info ) { OperandInfo *ptr, *tmp; ptr = opcode_info->operand; while( ptr != NULL ) { tmp = ptr->next; free_operand_info( ptr ); ptr = tmp; } free( opcode_info ); } static int isRegister( char *s ) { if( ( strcmp( s, "eax" ) == 0 ) || ( strcmp( s, "ecx" ) == 0 ) || ( strcmp( s, "edx" ) == 0 ) || ( strcmp( s, "ebx" ) == 0 ) || ( strcmp( s, "al" ) == 0 ) || ( strcmp( s, "ebp" ) == 0 ) || ( strcmp( s, "esp" ) == 0 ) ) { return 1; } return 0; } static Register getRegister( char *s ) { if( strcmp( s, "eax" ) == 0 ) return REGISTER_EAX; if( strcmp( s, "ecx" ) == 0 ) return REGISTER_ECX; if( strcmp( s, "edx" ) == 0 ) return REGISTER_EDX; if( strcmp( s, "ebx" ) == 0 ) return REGISTER_EBX; if( strcmp( s, "al" ) == 0 ) return REGISTER_AL; if( strcmp( s, "ebp" ) == 0 ) return REGISTER_EBP; if( strcmp( s, "esp" ) == 0 ) return REGISTER_ESP; return REGISTER_UNKNOWN; } static int is32bitRegister( Register r ) { switch( r ) { case REGISTER_EAX: case REGISTER_ECX: case REGISTER_EDX: case REGISTER_EBX: case REGISTER_EBP: case REGISTER_ESP: return 1; case REGISTER_AL: default: return 0; } } typedef enum ExpressionNodeType { EXPRESSION_NODE_TYPE_CONST, EXPRESSION_NODE_TYPE_VAR, EXPRESSION_NODE_TYPE_OP } ExpressionNodeType; typedef struct ExpressionNode { ExpressionNodeType type; S_Symbol op; struct ExpressionNode *left, *right; int integer_value; Symbol *symbol; } ExpressionNode; static ExpressionNode *create_expression_node( void ) { ExpressionNode *node = Allocate( sizeof( ExpressionNode ) ); node->left = NULL; node->right = NULL; return node; } static void free_expression_node( ExpressionNode *node ) { if( node->left != NULL ) { free_expression_node( node->left ); } if( node->right != NULL ) { free_expression_node( node->right ); } free( node ); } static ExpressionNode *parseFactor( void ) { Symbol *symbol; ExpressionNode *node = NULL; if( sym == S_number ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; node->integer_value = num; sym = getSym( ); } else if( sym == S_ident ) { symbol = get_symbol( ident ); if( symbol == NULL ) { symbol = insert_symbol( ident ); } node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_VAR; node->symbol = symbol; sym = getSym( ); } else if( sym == S_current_org ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; node->integer_value = ADDRESS_CURRENT_ORG; sym = getSym( ); } else if( sym == S_current_addr ) { node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_CONST; node->integer_value = ADDRESS_CURRENT_ADDR; sym = getSym( ); } else { Abort( "Expected a literal, a variable or a constant." ); } return node; } static ExpressionNode *parseTerm( void ) { ExpressionNode *node, *tmp; node = parseFactor( ); while( sym == S_star || sym == S_slash ) { tmp = node; node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = sym; sym = getSym( ); node->left = tmp; node->right = parseFactor( ); } return node; } static ExpressionNode *parseExpression( void ) { ExpressionNode *node, *tmp; node = parseTerm( ); while( sym == S_plus || sym == S_minus ) { tmp = node; node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = sym; sym = getSym( ); node->left = tmp; node->right = parseTerm( ); } return node; } static int evaluateExpression( ExpressionNode *node, int ORG, int LC ) { int left; int right; int num; switch( node->type ) { case EXPRESSION_NODE_TYPE_CONST: if( node->integer_value == ADDRESS_CURRENT_ORG ) { return ORG; } else if( node->integer_value == ADDRESS_CURRENT_ADDR ) { return LC; } else { return node->integer_value; } case EXPRESSION_NODE_TYPE_VAR: return node->symbol->addr; case EXPRESSION_NODE_TYPE_OP: left = evaluateExpression( node->left, ORG, LC ); right = evaluateExpression( node->right, ORG, LC ); switch( node->op ) { case S_plus: num = left + right; break; case S_minus: num = left - right; break; default: num = 0; /* make clang happy */ Abort( "Unknown operation '%s' in expression", symname[node->op] ); } return num; } return 0; } static OperandInfo *parseOperand( OpcodeInfo *opcode_info ) { OperandInfo *operand_info; operand_info = Allocate( sizeof( OperandInfo ) ); operand_info->str = NULL; operand_info->next = NULL; operand_info->num = ADDRESS_UNDEFINED; operand_info->node = NULL; if( sym == S_ident ) { /* op like eax or an address jmp x */ if( isRegister( ident ) ) { operand_info->type = OPERAND_REGISTER; operand_info->reg = getRegister( ident ); sym = getSym( ); } else { operand_info->type = OPERAND_MEMORY_DIRECT; operand_info->node = parseExpression( ); } } else if( sym == S_number || sym == S_current_org || sym == S_current_addr ) { /* absolute operand, like in mov eax, $1, * we have assemble-time expressions here */ operand_info->type = OPERAND_ABSOLUTE; operand_info->node = parseExpression( ); } else if( sym == S_string ) { operand_info->type = OPERAND_ABSOLUTE; operand_info->str = AllocateAndCopyStr( str ); opcode_info->size = strlen( str ); sym = getSym( ); } else if( sym == S_lbrak ) { /* pointer indirection as [a] - memory indirect and * register indirect as [ebx] */ Expect( S_lbrak ); if( isRegister( ident ) ) { operand_info->type = OPERAND_REGISTER_INDIRECT; operand_info->reg = getRegister( ident ); sym = getSym( ); } else { operand_info->type = OPERAND_MEMORY_INDIRECT; operand_info->node = parseExpression( ); } Expect( S_rbrak ); } else if( sym == S_newline ) { /* ok, end of operands */ } else { Abort( "Parse error when parsing operands at '%s'", symname[sym] ); } return operand_info; } static void append_operand( OpcodeInfo *opcode_info, OperandInfo *operand_info ) { OperandInfo *ptr; if( opcode_info->operand == NULL ) { opcode_info->operand = operand_info; } else { ptr = opcode_info->operand; while( ptr->next != NULL ) { ptr = ptr->next; } ptr->next = operand_info; } } static void parseOperands( OpcodeInfo *opcode_info ) { int nof_operands = 0; OperandInfo *operand_info; while( sym != S_newline ) { operand_info = parseOperand( opcode_info ); append_operand( opcode_info, operand_info ); nof_operands++; while( sym == S_comma ) { sym = getSym( ); operand_info = parseOperand( opcode_info ); append_operand( opcode_info, operand_info ); nof_operands++; } } if( nof_operands < opcode_info->min_operands ) { Abort( "'%s' expects at least %d operand(s), only %d given", opcodename[opcode_info->opcode], opcode_info->min_operands, nof_operands ); } if( nof_operands > opcode_info->max_operands ) { Abort( "'%s' expects at most %d operand(s), but %d given", opcodename[opcode_info->opcode], opcode_info->max_operands, nof_operands ); } switch( opcode_info->opcode ) { case OPCODE_HLT: case OPCODE_NOP: break; case OPCODE_PSEUDO_DD: case OPCODE_PSEUDO_DW: { OperandInfo *operand_info; int i = 0; int size = 0; operand_info = opcode_info->operand; while( operand_info != NULL ) { switch( opcode_info->opcode ) { case OPCODE_PSEUDO_DW: size += 2; break; case OPCODE_PSEUDO_DD: size += 4; default: break; } i++; operand_info = operand_info->next; } opcode_info->max_operands = i; opcode_info->size = size; } break; case OPCODE_PSEUDO_DB: { OperandInfo *operand_info; int i = 0; int size = 0; operand_info = opcode_info->operand; while( operand_info != NULL ) { if( operand_info->str != NULL ) { size += strlen( operand_info->str ); } else { size += 1; } i++; operand_info = operand_info->next; } opcode_info->max_operands = i; opcode_info->size = size; } break; case OPCODE_MOV: if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_ABSOLUTE ) { /* mov eax, $22, load absolute value */ } else if( opcode_info->operand->type == OPERAND_MEMORY_INDIRECT && opcode_info->operand->next->type == OPERAND_REGISTER && ( opcode_info->operand->next->reg == REGISTER_EAX || opcode_info->operand->next->reg == REGISTER_AL ) ) { /* mov [mem], eax, storing to memory */ } else if( opcode_info->operand->type == OPERAND_REGISTER && ( opcode_info->operand->reg == REGISTER_EAX || opcode_info->operand->reg == REGISTER_AL ) && opcode_info->operand->next->type == OPERAND_MEMORY_INDIRECT ) { /* mov eax, [mem], reading variables or parts of it */ } else if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->reg == REGISTER_EAX && opcode_info->operand->next->type == OPERAND_MEMORY_DIRECT ) { /* mov eax, mem, for address calculations (arrays, records) */ } else if( opcode_info->operand->type == OPERAND_REGISTER_INDIRECT && opcode_info->operand->reg == REGISTER_EBX && opcode_info->operand->next->type == OPERAND_REGISTER && ( opcode_info->operand->next->reg == REGISTER_EAX || opcode_info->operand->next->reg == REGISTER_AL ) ) { /* mov [ebx], eax, indirect addressing array/record elements, write */ opcode_info->size = 2; } else if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_REGISTER_INDIRECT && ( opcode_info->operand->reg == REGISTER_EAX || opcode_info->operand->reg == REGISTER_AL ) ) { /* mov eax, [ebx], indirect addressing array/record elements, read */ opcode_info->size = 2; } else { Abort( "Unsupported operand combination in 'mov'" ); } break; case OPCODE_PUSH: case OPCODE_POP: case OPCODE_MUL: case OPCODE_DIV: if( opcode_info->operand->type != OPERAND_REGISTER ) { Abort( "'%s' expects a register as argument", opcodename[opcode_info->opcode] ); } break; case OPCODE_ADD: case OPCODE_SUB: case OPCODE_CMP: if( opcode_info->operand->type != OPERAND_REGISTER || opcode_info->operand->next->type != OPERAND_REGISTER ) { Abort( "Unsupported addressing mode in '%s'", opcodename[opcode_info->opcode] ); } break; case OPCODE_JMP: case OPCODE_JE: case OPCODE_JNE: case OPCODE_JB: case OPCODE_JBE: case OPCODE_JA: case OPCODE_JAE: case OPCODE_CALL: if( opcode_info->operand->type != OPERAND_MEMORY_DIRECT ) { Abort( "'%s' expects a jump label", opcodename[opcode_info->opcode] ); } break; case OPCODE_INT: if( opcode_info->operand->type == OPERAND_ABSOLUTE ) { /* int $80, jump to interrupt vector number */ } case OPCODE_RET: if( opcode_info->operand == NULL ) { /* ok, no parameter return */ opcode_info->size = 1; } else if( opcode_info->operand->type == OPERAND_ABSOLUTE ) { /* return, pop N bytes from stack */ opcode_info->size = 3; } else { Abort( "'%s' expects either no operand or a number of bytes to remove from the stack", opcodename[opcode_info->opcode] ); } break; default: Abort( "Unhandled opcode '%s' when checking operand validity", opcodename[opcode_info->opcode] ); } } static OpcodeInfo *parseOperation( Symbol *label ) { OpcodeInfo *opcode_info; opcode_info = parseOpcode( ); opcode_info->label = label; parseOperands( opcode_info ); return opcode_info; } static OpcodeInfo *parseOrg( void ) { OpcodeInfo *opcode_info; OperandInfo *operand_info; Expect( S_org ); number( ); opcode_info = Allocate( sizeof( OpcodeInfo ) ); opcode_info->addr = 0; opcode_info->opcode = OPCODE_PSEUDO_ORG; opcode_info->operand = NULL; opcode_info->next = NULL; opcode_info->size = 0; operand_info = Allocate( sizeof( OperandInfo ) ); operand_info->next = NULL; operand_info->type = OPERAND_ABSOLUTE; operand_info->num = num; operand_info->str = NULL; operand_info->node = NULL; append_operand( opcode_info, operand_info ); sym = getSym( ); return opcode_info; } static OpcodeInfo *parseAssignment( void ) { OpcodeInfo *opcode_info; OperandInfo *operand_info; Symbol *symbol; ExpressionNode *node; opcode_info = Allocate( sizeof( OpcodeInfo ) ); opcode_info->addr = 0; opcode_info->opcode = OPCODE_PSEUDO_ASSIGN; opcode_info->operand = NULL; opcode_info->next = NULL; opcode_info->size = 0; symbol = get_symbol( ident ); if( symbol != NULL && symbol->defined ) { Abort( "Constant '%s' has already been defined", ident ); } else { if( symbol == NULL ) { symbol = insert_symbol( ident ); } } symbol->defined = 1; node = create_expression_node( ); node->type = EXPRESSION_NODE_TYPE_OP; node->op = sym; node->left = create_expression_node( ); node->left->type = EXPRESSION_NODE_TYPE_VAR; node->left->symbol = symbol; sym = getSym( ); Expect( S_equals ); node->right = parseExpression( ); operand_info = Allocate( sizeof( OperandInfo ) ); operand_info->next = NULL; operand_info->type = OPERAND_ABSOLUTE; operand_info->num = 0; operand_info->str = NULL; operand_info->node = node; append_operand( opcode_info, operand_info ); return opcode_info; } Symbol *last_label = NULL; static OpcodeInfo *parseDirective( void ) { OpcodeInfo *opcode_info = NULL; Symbol *symbol; S_Symbol peek; switch( sym ) { case S_org: opcode_info = parseOrg( ); break; case S_ident: peek = peekSym( ); if( peek == S_colon ) { symbol = parseLabel( ); if( last_label != NULL ) { symbol->next_label = last_label; } last_label = symbol; if( sym == S_newline ) { return NULL; } opcode_info = parseOperation( last_label ); last_label = NULL; } else if( peek == S_equals ) { opcode_info = parseAssignment( ); } else { opcode_info = parseOperation( last_label ); last_label = NULL; } break; case S_newline: break; default: Abort( "Parse error at '%s'\n", symname[sym] ); } return opcode_info; } static void parseHeader( void ) { Expect( S_format ); Expect( S_binary ); /* the only format we support for now */ Expect( S_newline ); Expect( S_use32 ); /* for now the only width we support */ Expect( S_newline ); } static void init( void ) { col = 1; row = 1; look = getChar( ); sym = getSym( ); } static void prologue( void ) { /* format binary: raw hex, entry point is fix at ORG, * data follows code */ } static void epilogue( void ) { /* do final things here */ } static void free_symbols( Symbol *sym ) { Symbol *ptr; while( sym != NULL ) { ptr = sym->next; free_symbol( sym ); sym = ptr; } } static void free_opcodes( OpcodeInfo *opcode_info ) { OpcodeInfo *info; while( opcode_info != NULL ) { info = opcode_info->next; free_opcode_info( opcode_info ); opcode_info = info; } } static void deinit( void ) { free_symbols( symbol ); free_opcodes( opcodes_head ); } static int relative_distance( OpcodeInfo *opcode_info, int src_addr, int dest_addr ) { int rel = dest_addr - src_addr - opcode_info->size; return rel; } static int patchup_addresses( OpcodeInfo *opcode_info, int ORG ) { OpcodeInfo *opcode = opcode_info; OperandInfo *operand; int res = 0; while( opcode != NULL ) { /* replace illegal address (forward references) with the * correct address */ operand = opcode->operand; while( operand != NULL ) { /* if( operand->type == OPERAND_MEMORY_DIRECT || operand->type == OPERAND_MEMORY_INDIRECT ) { */ if( operand->num == ADDRESS_UNDEFINED ) { if( operand->node != NULL ) { operand->num = evaluateExpression( operand->node, ORG, opcode->addr ); } } /* } */ operand = operand->next; } /* set new increment of an instruction, if we have to * enhance the space occupied by the opcode */ switch( opcode->opcode ) { case OPCODE_JMP: if( DEBUG_PARSER ) { fprintf( stderr, "JMP at $%X to $%X\n", opcode->addr, opcode->operand->num ); } if( opcode->addr != ADDRESS_UNDEFINED && opcode->operand->num != ADDRESS_UNDEFINED ) { int rel = relative_distance( opcode, opcode->addr, opcode->operand->num ); if( DEBUG_PARSER ) { fprintf( stderr, "JMP at $%X to $%X, rel %d\n", opcode->addr, opcode->operand->num, rel ); } if( opcode->size == 2 ) { if( rel >= -128 && rel <= 127 ) { /* all fine, optimistic case already taken, short rel8 jump */ } else { if( DEBUG_PARSER ) { fprintf( stderr, "Increasing opcode of jump to 5 at $%X\n", opcode->addr ); } opcode->size = 5; res = 1; } } else if( opcode->size == 5 ) { /* rel32 set, ok */ } else { Abort( "Wrong opcode size '%d' for jmp with address '%X'", opcode->size, opcode->addr ); } } break; case OPCODE_JE: case OPCODE_JNE: case OPCODE_JB: case OPCODE_JBE: case OPCODE_JA: case OPCODE_JAE: if( opcode->addr != ADDRESS_UNDEFINED && opcode->operand->num != ADDRESS_UNDEFINED ) { int rel = relative_distance( opcode, opcode->addr, opcode->operand->num ); if( rel >= -128 && rel <= 127 ) { /* all fine, short rel8 jump */ } else { Abort( "Conditional jumps only possible in rel8 distance currently" ); } } break; case OPCODE_CALL: /* all fine, we only have rel32 absolute adresses for now */ break; default: break; } opcode = opcode->next; } return res; } static char *get_expression_comment( ExpressionNode *node, char *buf, int bufsize ) { char s[MAX_STRING_LEN+1]; s[0] = '\0'; switch( node->type ) { case EXPRESSION_NODE_TYPE_CONST: snprintf( s, MAX_STRING_LEN, "$%X", node->integer_value ); strlcat( buf, s, bufsize ); break; case EXPRESSION_NODE_TYPE_VAR: strlcat( buf, node->symbol->name, bufsize ); break; case EXPRESSION_NODE_TYPE_OP: if( node->left != NULL ) { get_expression_comment( node->left, buf, bufsize ); } strlcat( buf, symname[node->op], bufsize ); if( node->right != NULL ) { get_expression_comment( node->right, buf, bufsize ); } break; default: Abort( "Unhandled case in expression tree while outputing comment!" ); } return buf; } static void print_opcodes( OpcodeInfo *opcode_info ) { OpcodeInfo *opcode = opcode_info; OperandInfo *operand; char indent[3]; int first; char buf[MAX_STRING_LEN+1]; while( opcode != NULL ) { first = 1; strlcpy( indent, " ", 2 ); fprintf( stderr, "%X: %s", opcode->addr, opcodename[opcode->opcode] ); operand = opcode->operand; buf[0] = '\0'; while( operand != NULL ) { switch( operand->type ) { case OPERAND_ABSOLUTE: if( operand->str == NULL ) { fprintf( stderr, "%s$%X", indent, operand->num ); } else { int i; int len = strlen( operand->str ); fprintf( stderr, "%s\"", indent ); for( i = 0; i < len; i++ ) { fprintf( stderr, "%c", operand->str[i] ); } fprintf( stderr, "\"" ); } break; case OPERAND_REGISTER: fprintf( stderr, "%s%s", indent, registername[operand->reg] ); break; case OPERAND_MEMORY_DIRECT: fprintf( stderr, "%s%s=$%X", indent, get_expression_comment( operand->node, buf, MAX_STRING_LEN ), operand->num ); break; case OPERAND_MEMORY_INDIRECT: fprintf( stderr, "%s[%s=$%X]", indent, get_expression_comment( operand->node, buf, MAX_STRING_LEN ), operand->num ); break; case OPERAND_REGISTER_INDIRECT: fprintf( stderr, "%s[%s]", indent, registername[operand->reg] ); break; } if( first ) { first = 0; strlcpy( indent, ", ", 3 ); } operand = operand->next; } fputs( "\n", stderr ); opcode = opcode->next; } } void print_labels( Symbol *symbol ) { Symbol *sym = symbol; if( DEBUG_PARSER ) { while( sym != NULL ) { fprintf( stderr, "%s=$%X\n", sym->name, sym->addr ); sym = sym->next; } } } static void check_for_undefined_labels( Symbol *symbol ) { Symbol *sym = symbol; sym = symbol; while( sym != NULL ) { if( sym->addr == ADDRESS_UNDEFINED ) { Abort( "Undefined address for label '%s'", sym->name ); } sym = sym->next; } } static int compute_addresses( OpcodeInfo *opcode_info ) { OpcodeInfo *opcode = opcode_info; OperandInfo *operand; int ORG = 0; int LC = 0; Symbol *label; while( opcode != NULL ) { switch( opcode->opcode ) { case OPCODE_PSEUDO_ORG: ORG = opcode->operand->num; LC = ORG; if( DEBUG_PARSER ) { fprintf( stderr, "ORG/LC set to $%X\n", LC ); } break; case OPCODE_PSEUDO_ASSIGN: opcode->operand->node->left->symbol->addr = evaluateExpression( opcode->operand->node->right, ORG, LC ); break; case OPCODE_PSEUDO_DD: case OPCODE_PSEUDO_DW: case OPCODE_PSEUDO_DB: case OPCODE_MOV: case OPCODE_PUSH: case OPCODE_POP: case OPCODE_ADD: case OPCODE_SUB: case OPCODE_MUL: case OPCODE_DIV: case OPCODE_CMP: case OPCODE_JMP: case OPCODE_JE: case OPCODE_JNE: case OPCODE_JB: case OPCODE_JBE: case OPCODE_JA: case OPCODE_JAE: case OPCODE_INT: case OPCODE_HLT: case OPCODE_NOP: case OPCODE_CALL: case OPCODE_RET: opcode->addr = LC; label = opcode->label; while( label != NULL ) { label->addr = LC; if( DEBUG_PARSER ) { fprintf( stderr, "LC=$%X assigned to label '%s'\n", LC, label->name ); } label = label->next_label; } LC += opcode->size; operand = opcode->operand; while( operand != NULL ) { switch( operand->type ) { case OPERAND_ABSOLUTE: if( operand->node != NULL ) { operand->num = evaluateExpression( operand->node, ORG, LC ); } break; case OPERAND_MEMORY_DIRECT: case OPERAND_MEMORY_INDIRECT: if( operand->node != NULL ) { operand->num = evaluateExpression( operand->node, ORG, LC ); } if( DEBUG_PARSER ) { fprintf( stderr, "LC=$%X assigned to operand at $%X\n", LC, operand->num ); } break; case OPERAND_REGISTER: case OPERAND_REGISTER_INDIRECT: break; default: Abort( "Unhandled case when computing operands from expressions" ); } operand = operand->next; } break; default: Abort( "Unhandled opcode '%s' when computing addresses", opcodename[opcode->opcode] ); } opcode = opcode->next; } return ORG; } static void generate_code( OpcodeInfo *opcode_info ) { int has_to_relocate = 0; int pass = 0; int ORG = 0; do { ORG = compute_addresses( opcode_info ); pass++; if( pass > MAX_PASSES ) { Abort( "Too many passes" ); } if( DEBUG_PARSER ) { fprintf( stderr, "-- pass %d\n", pass ); fputs( "code:\n", stderr ); print_opcodes( opcode_info ); fputs( "labels:\n", stderr ); print_labels( symbol ); } has_to_relocate = patchup_addresses( opcode_info, ORG ); } while( has_to_relocate ); compute_addresses( opcode_info ); pass++; check_for_undefined_labels( symbol ); if( DEBUG_PARSER ) { fprintf( stderr, "-- pass %d\n", pass ); fputs( "code:\n", stderr ); print_opcodes( opcode_info ); fputs( "labels:\n", stderr ); print_labels( symbol ); } fprintf( stderr, "Assembled in %d %s.\n", pass, ( pass == 1 ) ? "pass" : "passes" ); } static void emit_opcode( OpcodeInfo *opcode_info ) { OperandInfo *operand; switch( opcode_info->opcode ) { case OPCODE_PSEUDO_ORG: break; case OPCODE_PSEUDO_DD: operand = opcode_info->operand; while( operand != NULL ) { Emit_double_little_endian( opcode_info->operand->num ); operand = operand->next; } break; case OPCODE_PSEUDO_DW: operand = opcode_info->operand; while( operand != NULL ) { Emit_word_little_endian( operand->num ); operand = operand->next; } break; case OPCODE_PSEUDO_DB: operand = opcode_info->operand; while( operand != NULL ) { if( operand->str == NULL ) { Emit_byte( operand->num ); } else { int i; int len = strlen( operand->str ); for( i = 0; i < len; i++ ) { Emit( "%c", operand->str[i] ); } } operand = operand->next; } break; case OPCODE_HLT: Emit( "%c", 0xF4 ); break; case OPCODE_NOP: Emit( "%c", 0x90 ); break; case OPCODE_MOV: if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_ABSOLUTE ) { Emit( "%c", 0xB8 | opcode_info->operand->reg ); Emit_double_little_endian( opcode_info->operand->next->num ); } else if( opcode_info->operand->type == OPERAND_MEMORY_INDIRECT && opcode_info->operand->next->type == OPERAND_REGISTER ) { if( opcode_info->operand->next->reg == REGISTER_EAX ) { Emit( "%c", 0xA3 ); } else if( opcode_info->operand->next->reg == REGISTER_AL ) { Emit( "%c", 0xA2 ); } else { Abort( "mov is only allowed with EAX or AL register" ); } Emit_double_little_endian( opcode_info->operand->num ); } else if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_MEMORY_INDIRECT ) { if( opcode_info->operand->reg == REGISTER_EAX ) { Emit( "%c", 0xA1 ); } else if( opcode_info->operand->reg == REGISTER_AL ) { Emit( "%c", 0xA0 ); } else { Abort( "mov is only allowed with EAX or AL register" ); } Emit_double_little_endian( opcode_info->operand->next->num ); } else if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->reg == REGISTER_EAX && opcode_info->operand->next->type == OPERAND_MEMORY_DIRECT ) { Emit( "%c", 0xB8 ); Emit_double_little_endian( opcode_info->operand->next->num ); } else if( opcode_info->operand->type == OPERAND_REGISTER_INDIRECT && opcode_info->operand->reg == REGISTER_EBX && opcode_info->operand->next->type == OPERAND_REGISTER && ( opcode_info->operand->next->reg == REGISTER_EAX || opcode_info->operand->next->reg == REGISTER_AL ) ) { if( opcode_info->operand->next->reg == REGISTER_EAX ) { Emit( "%c", 0x89 ); } else if( opcode_info->operand->next->reg == REGISTER_AL ) { Emit( "%c", 0x88 ); } Emit( "%c", 0x03 ); } else if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_REGISTER_INDIRECT && opcode_info->operand->next->reg == REGISTER_EBX && ( opcode_info->operand->reg == REGISTER_EAX || opcode_info->operand->reg == REGISTER_AL ) ) { if( opcode_info->operand->reg == REGISTER_EAX ) { Emit( "%c", 0x8B ); } else if( opcode_info->operand->reg == REGISTER_AL ) { Emit( "%c", 0x8A ); } Emit( "%c", 0x03 ); } else { Abort( "Unhandled opcode generation case in 'mov'" ); } break; case OPCODE_PUSH: if( opcode_info->operand->type == OPERAND_REGISTER ) { if( is32bitRegister( opcode_info->operand->reg ) ) { Emit( "%c", 0x50 | opcode_info->operand->reg ); } else { Abort( "Only 32-bit stack operations are possible" ); } } break; case OPCODE_POP: if( opcode_info->operand->type == OPERAND_REGISTER ) { if( is32bitRegister( opcode_info->operand->reg ) ) { Emit( "%c", 0x58 | opcode_info->operand->reg ); } else { Abort( "Only 32-bit stack operations are possible" ); } } break; case OPCODE_ADD: if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_REGISTER ) { Emit( "%c%c", 0x01, 0xC0 | ( opcode_info->operand->reg ) | ( opcode_info->operand->next->reg << 3 ) ); } break; case OPCODE_SUB: if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_REGISTER ) { Emit( "%c%c", 0x29, 0xC0 | ( opcode_info->operand->reg ) | ( opcode_info->operand->next->reg << 3 ) ); } break; case OPCODE_MUL: if( opcode_info->operand->type == OPERAND_REGISTER ) { Emit( "%c%c", 0xF7, 0xE0 | opcode_info->operand->reg ); } break; case OPCODE_DIV: if( opcode_info->operand->type == OPERAND_REGISTER ) { Emit( "%c%c", 0xF7, 0xF0 | opcode_info->operand->reg ); } break; case OPCODE_CMP: if( opcode_info->operand->type == OPERAND_REGISTER && opcode_info->operand->next->type == OPERAND_REGISTER ) { Emit( "%c%c", 0x39, 0xC0 | ( opcode_info->operand->reg ) | ( opcode_info->operand->next->reg << 3 ) ); } break; case OPCODE_JMP: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { int rel = relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ); if( rel >= -128 && rel <= 127 ) { Emit( "%c", 0xEB ); Emit_char( rel ); } else { Emit( "%c", 0xE9 ); Emit_double_little_endian( rel ); } } break; case OPCODE_JE: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { Emit( "%c%c", 0x74, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ) ); } break; case OPCODE_JNE: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { Emit( "%c%c", 0x75, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ) ); } break; case OPCODE_JB: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { Emit( "%c%c", 0x72, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ) ); } break; case OPCODE_JBE: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { Emit( "%c%c", 0x76, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ) ); } break; case OPCODE_JA: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { Emit( "%c%c", 0x77, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ) ); } break; case OPCODE_JAE: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { Emit( "%c%c", 0x73, relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ) ); } break; case OPCODE_INT: if( opcode_info->operand->type == OPERAND_ABSOLUTE ) { Emit( "%c%c", 0xCD, opcode_info->operand->num ); } break; case OPCODE_CALL: if( opcode_info->operand->type == OPERAND_MEMORY_DIRECT ) { int rel = relative_distance( opcode_info, opcode_info->addr, opcode_info->operand->num ); Emit( "%c", 0xE8 ); Emit_double_little_endian( rel ); } break; case OPCODE_RET: if( opcode_info->operand == NULL ) { Emit( "%c", 0xC3 ); } else if( opcode_info->operand->type == OPERAND_ABSOLUTE ) { Emit( "%c", 0xC2 ); Emit_word_little_endian( opcode_info->operand->num ); } break; case OPCODE_PSEUDO_ASSIGN: break; default: Abort( "Opcode '%s' has not been implemented when generating binary", opcodename[opcode_info->opcode] ); } } static void emit_code( OpcodeInfo *opcode_info ) { OpcodeInfo *opcode = opcode_info; prologue( ); while( opcode != NULL ) { emit_opcode( opcode ); opcode = opcode->next; } epilogue( ); } int main( void ) { OpcodeInfo *opcode_info = NULL; init( ); parseHeader( ); while( sym != S_eof ) { opcode_info = parseDirective( ); if( opcode_info != NULL ) { if( opcodes_head == NULL ) { opcodes_head = opcode_info; } if( opcodes_tail == NULL ) { opcodes_tail = opcode_info; } else { opcodes_tail->next = opcode_info; opcodes_tail = opcode_info; } } Expect( S_newline ); } if( sym != S_eof ) { Abort( "Unexpected EOF" ); } generate_code( opcodes_head ); emit_code( opcodes_head ); deinit( ); malloc_stats( ); Halt( EXIT_SUCCESS ); return 0; }