diff options
Diffstat (limited to 'miniany/doc/blog.jeff.over.bz_assembly_compilers_jit_2017_01_15_x86-assembler.txt')
-rw-r--r-- | miniany/doc/blog.jeff.over.bz_assembly_compilers_jit_2017_01_15_x86-assembler.txt | 679 |
1 files changed, 679 insertions, 0 deletions
diff --git a/miniany/doc/blog.jeff.over.bz_assembly_compilers_jit_2017_01_15_x86-assembler.txt b/miniany/doc/blog.jeff.over.bz_assembly_compilers_jit_2017_01_15_x86-assembler.txt new file mode 100644 index 0000000..f712d28 --- /dev/null +++ b/miniany/doc/blog.jeff.over.bz_assembly_compilers_jit_2017_01_15_x86-assembler.txt @@ -0,0 +1,679 @@ + #[1]256LOL RSS Feed + +[2][headshot-circle.png] + + 256 LINES OR LESS + (jeff overbey's blog) + * [3]22 Apr 2018 Building a Go Doctor Refactoring + * [4]09 Sep 2017 Lexical Analysis + * [5]01 Jun 2017 On Performance Improvements + * [6]30 Mar 2017 Executing Dynamically Generated Machine Code: The + Start of a JIT + * [7]15 Feb 2017 Finding Machine Language Encodings + * [8]15 Jan 2017 An x86 Assembler in 256 LOC + * [9]15 Dec 2016 256 Lines or Less (the joverblog?) + + [10]RSS Feed o [11]My Web Site + [sidebar-button.png] + [empty.jpg] + +An x86 Assembler in 256 LOC + + 15 Jan 2017 + + For the first "real" post in this blog, we'll build an x86 assembler in + less than 256 lines of C code. Obviously, it won't implement every x86 + instruction, but it will implement a surprisingly useful subset: data + movement, control flow, integer arithmetic, bitwise operations, and + function calls. We won't be able to run the generated machine code yet + (that's coming in a later blog post), but we'll be in a good position + to do so. + + I'll assume you're already familiar with x86 assembly language + (hopefully the table below will serve as a brief refresher), although I + won't assume you know about their machine language encodings. I'll also + assume that you're familiar with hexadecimal representation and + arithmetic (e.g., 9 + 1 = A and 10 - 1 = F). + +1. Which instructions will we support? + + By the time we finish, we'll have an assembler that supports all of the + following x86 instructions (yes, I'm serious): + Instruction Example Description of the Example + nop nop No operation (do nothing) + -- Data Movement -- + mov register, immediate mov eax, 0F00Dh Place the value F00D + (hexadecimal) in EAX + mov register, register mov eax, ebx Copy the value from the EBX + register into EAX + mov register, [register] mov eax, [ebx] Treat EBX as pointer; load + 32-bit value from memory into EAX + mov [register], register mov [eax], ebx Treat EAX as pointer; store + 32-bit value from EBX in memory + -- Arithmetic -- + add register, register add eax, ebx EAX = EAX + EBX + cdq cdq Sign-extend EAX into EDX in preparation for idiv + dec register dec eax EAX = EAX - 1 + div register div ebx Unsigned division: EDX:EAX ÷ EBX, + setting EAX = quotient, EDX = remainder + idiv register idiv ebx Signed division: EDX:EAX ÷ EBX, + setting EAX = quotient, EDX = remainder + imul register imul ebx Signed multiplication: EDX:EAX = EAX × EBX + inc register inc eax EAX = EAX + 1 + neg register neg eax EAX = -EAX + mul register mul ebx Unsigned multiplication: EDX:EAX = EAX × EBX + sub register, register sub eax, ebx EAX = EAX - EBX + -- Bitwise Operations -- + and register, register and eax, ebx EAX = EAX & EBX + not register not eax EAX = ~EAX + or register, register or eax, ebx EAX = EAX | EBX + sar register, immediate sar eax, 2 Shift EAX right by 2 bits + (sign-fill) + sar register, cl sar eax, cl Shift EAX right by CL bits (sign-fill) + shl register, immediate shl eax, 2 Shift EAX left by 2 bits + shl register, cl shl eax, cl Shift EAX left by number of bits in CL + shr register, immediate shr eax, 2 Shift EAX right by 2 bits + (zero-fill) + shr register, cl shr eax, cl Shift EAX right by CL bits (zero-fill) + xor register, register xor eax, ebx EAX = EAX ^ EBX + -- Comparison -- + cmp register, register cmp eax, ebx Compare EAX to EBX, setting + flags for conditional jump + -- Control Flow -- + jmp bytes jmp -10 Jump -10 bytes, i.e., move EIP backward by 10 + bytes + ja bytes ja -10 Jump if above (>, unsigned) + jae bytes jae -10 Jump if above or equal (>=, unsigned) + jb bytes jb -10 Jump if below (<, unsigned) + jbe bytes jbe -10 Jump if below or equal (<=, unsigned) + je bytes je -10 Jump if equal + jg bytes jg -10 Jump if greater (>, signed) + jge bytes jge -10 Jump if greater or equal (>=, signed) + jl bytes jl -10 Jump if less (<, signed) + jle bytes jle -10 Jump if less or equal (<=, signed) + jne bytes jne -10 Jump if not equal + -- Function Calls -- + call register call eax Call function at pointer stored in EAX + push register push eax Push value of EAX onto the stack + pop register pop eax Pop a value from the stack into EAX + ret immediate ret 4 Return from function, removing 4 bytes of stack + arguments + +2. The API: x86asm.h + + The header file, x86asm.h, defines the API that we intend for clients + to use. It provides + * an enumeration of the x86's 32-bit registers (reg32_t), and + * one function for each instruction form we can assemble. + + Here's the header in its entirety. (There's more explanation in the + next section, but it will be helpful to read through the header file + first.) +// x86 Subset Assembler - API +//----------------------------------------------------------------------------- +// Copyright (C) 2017 Jeffrey L. Overbey. Use of this source code is governed +// by a BSD-style license posted at http://blog.jeff.over.bz/license/ + +#ifndef X86ASM_H +#define X86ASM_H + +#include <stdint.h> // uint8_t, unint32_t + +typedef enum { EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI } reg32_t; + +uint8_t *nop(uint8_t *buf); + +uint8_t *mov_immediate(reg32_t dest, int32_t value, uint8_t *buf); +uint8_t * mov_from_ptr(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * mov_to_ptr(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * mov(reg32_t dest, reg32_t src, uint8_t *buf); + +uint8_t * add(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * sub(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * and(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * or(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * xor(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * cmp(reg32_t dest, reg32_t src, uint8_t *buf); +uint8_t * inc(reg32_t reg, uint8_t *buf); +uint8_t * dec(reg32_t reg, uint8_t *buf); +uint8_t * not(reg32_t reg, uint8_t *buf); +uint8_t * neg(reg32_t reg, uint8_t *buf); +uint8_t * mul(reg32_t reg, uint8_t *buf); +uint8_t * imul(reg32_t reg, uint8_t *buf); +uint8_t * div_(reg32_t reg, uint8_t *buf); +uint8_t * idiv(reg32_t reg, uint8_t *buf); +uint8_t * cdq(uint8_t *buf); + +uint8_t * shl(reg32_t reg, uint8_t bits, uint8_t *buf); +uint8_t *shl_cl(reg32_t reg, uint8_t *buf); +uint8_t * shr(reg32_t reg, uint8_t bits, uint8_t *buf); +uint8_t *shr_cl(reg32_t reg, uint8_t *buf); +uint8_t * sar(reg32_t reg, uint8_t bits, uint8_t *buf); +uint8_t *sar_cl(reg32_t reg, uint8_t *buf); + +uint8_t *push(reg32_t reg, uint8_t *buf); +uint8_t * pop(reg32_t reg, uint8_t *buf); +uint8_t *call(reg32_t reg, uint8_t *buf); +uint8_t * ret(uint16_t bytes, uint8_t *buf); + +uint8_t * jmp(int32_t relative_bytes, uint8_t *buf); +uint8_t * jb(int32_t relative_bytes, uint8_t *buf); +uint8_t * jae(int32_t relative_bytes, uint8_t *buf); +uint8_t * je(int32_t relative_bytes, uint8_t *buf); +uint8_t * jne(int32_t relative_bytes, uint8_t *buf); +uint8_t * jbe(int32_t relative_bytes, uint8_t *buf); +uint8_t * ja(int32_t relative_bytes, uint8_t *buf); +uint8_t * jl(int32_t relative_bytes, uint8_t *buf); +uint8_t * jge(int32_t relative_bytes, uint8_t *buf); +uint8_t * jle(int32_t relative_bytes, uint8_t *buf); +uint8_t * jg(int32_t relative_bytes, uint8_t *buf); + +#endif + +3. The demo program: demo.c + + Before delving into the implementation of the assembler, it's probably + helpful to show how this API is used. + + Each function in our API takes a uint8_t pointer buf, writes the + byte(s) of machine code for a single assembly language instruction to + memory starting at that address, then returns a pointer to the next + byte after the instruction that was just assembled. + + For example, the instruction mov eax, 12345678h is assembled into five + bytes of machine code: b8 78 56 34 12. Calling mov_immediate(EAX, + 0x12345678, buf) stores these five bytes into memory at the location + pointed to by buf, then it returns buf+5, which is presumably where + you'll want to store the next instruction. + + For example, suppose you want to assemble the following + three-instruction program. +mov eax, 120h +add eax, ecx +shl eax, 4 + + The following program illustrates how to assemble this sequence of + three instructions, then write the byte values of the resulting machine + code to standard output: +#include "x86asm.h" +#include <stdio.h> + +int main() { + uint8_t bytes[64]; + uint8_t *cur = bytes; + cur = mov_immediate(EAX, 0x120, cur); // mov eax, 120h + cur = add(EAX, ECX, cur); // add eax, ecx + cur = shl(EAX, 4, cur); // shl eax, 4 + + for (uint8_t *p = bytes; p < cur; p++) { + printf("%02x ", *p); + } + printf("\n"); + return 0; +} + + When you run this, the output is: +b8 20 01 00 00 03 c1 c1 e0 04 + +4. The implementation: x86asm.c + + Now, we'll start implementing this API. For each instruction, I'll + describe its machine language encoding, and then the C function that + implements it. + + The definitive, official reference for the x86 instruction set and its + machine language encoding is Volume 2 of the [12]Intel® 64 and IA-32 + Architectures Software Developer Manuals. Unfortunately, Intel's + documentation is not easy to read, so for this small assembler, it will + be sufficient to simply describe the encodings by example. + +No operation - nop + + The nop instruction assembles to a single byte of machine code: 90h. +uint8_t *nop(uint8_t *buf) { + *buf++ = 0x90; + return buf; +} + +Increment and decrement - inc, dec + + The inc instruction adds 1 to a value in a register; dec subtracts 1. + Recall from the header file above (x86asm.h) that we defined an + enumeration with all of the x86's 32-bit registers. +typedef enum { EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI } reg32_t; + + There's a reason we listed the registers in this specific order: when + instructions take register operands, the encodings tend to follow this + same order. Notice the pattern in the encodings of the inc and dec + instructions: + Instruction Encoding (hex) Instruction Encoding (hex) + inc eax 40 dec eax 48 + inc ecx 41 dec ecx 49 + inc edx 42 dec edx 4A + ... ... + inc edi 47 dec edi 4F + + Since our reg32_t enum assigns an integer value to each register name + (EAX=0, ECX=1, EDX=2, etc.), this means we can encode inc register by + simply adding the register number to hexadecimal 40. +uint8_t *inc(reg32_t reg, uint8_t *buf) { + *buf++ = 0x40 + reg; + return buf; +} + +uint8_t *dec(reg32_t reg, uint8_t *buf) { + *buf++ = 0x48 + reg; + return buf; +} + + (It's more conventional to describe encodings in terms of which bits in + the encoding represent the operand register. For example, see Volume 2, + Appendix B of the Intel documentation referenced above. From that + perspective, it might make more sense to build encodings using bitwise + operations. However, I'm writing this blog post from the perspective of + "look at the pattern and implement it;" adding values seems more + intuitive and produces the same result.) + +Move immediate value to register - mov reg, imm + + The following table shows the encodings for mov reg, 1 and + mov reg, 12345678h. Notice the pattern? + Instruction Encoding (hex) Instruction Encoding (hex) + mov eax, 1 B8 01 00 00 00 mov eax, 12345678h B8 78 56 34 12 + mov ecx, 1 B9 01 00 00 00 mov ecx, 12345678h B9 78 56 34 12 + mov edx, 1 BA 01 00 00 00 mov edx, 12345678h BA 78 56 34 12 + ... ... + mov edi, 1 BF 01 00 00 00 mov edi, 12345678h BF 78 56 34 12 + + While the inc and dec instructions had 1-byte encodings, the encoding + here is always 5 bytes. The first byte of the encoding is B8 + the + register number. The next four bytes are the immediate value in little + endian order, i.e., with the low-order byte first. Assuming the + assembler will be run on an x86/x64 processor, which uses little endian + byte ordering natively, nothing special needs to be done to reorder the + bytes--storing a 32-bit value in memory will store the bytes in little + endian order. +uint8_t *mov_immediate(reg32_t dest, int32_t value, uint8_t *buf) { + *buf++ = 0xB8 + dest; + *((int32_t *)buf) = value; buf += sizeof(int32_t); + return buf; +} + +Load value from memory - mov reg, DWORD PTR [reg] + + So far, our instructions have had straightforward encodings with + reasonably obvious patterns. This one gets a bit more interesting. + Instruction Encoding (hex) + mov eax, DWORD PTR [eax] 8B 00 + mov eax, DWORD PTR [ecx] 8B 01 + mov eax, DWORD PTR [edx] 8B 02 + mov eax, DWORD PTR [ebx] 8B 03 + mov eax, DWORD PTR [esp] 8B 04 24 + mov eax, DWORD PTR [ebp] 8B 45 00 + mov eax, DWORD PTR [esi] 8B 06 + mov eax, DWORD PTR [edi] 8B 07 + + mov ecx, DWORD PTR [eax] 8B 08 + mov ecx, DWORD PTR [ecx] 8B 09 + mov ecx, DWORD PTR [edx] 8B 0A + mov ecx, DWORD PTR [ebx] 8B 0B + mov ecx, DWORD PTR [esp] 8B 0C 24 + mov ecx, DWORD PTR [ebp] 8B 4D 00 + mov ecx, DWORD PTR [esi] 8B 0E + mov ecx, DWORD PTR [edi] 8B 0F + + mov edx, DWORD PTR [eax] 8B 10 + mov edx, DWORD PTR [ecx] 8B 11 + ... + mov edi, DWORD PTR [edi] 8B 3F + + This form of the mov instruction has a two-byte encoding with a fairly + obvious pattern, except when the source operand is ESP or EBP... then + it's a three-byte encoding with a not-so-obvious pattern.^1 +uint8_t *mov_from_ptr(reg32_t dest, reg32_t src, uint8_t *buf) { + *buf++ = 0x8B; + if (src == ESP) { + *buf++ = 8*dest + src; + *buf++ = 0x24; + } else if (src == EBP) { + *buf++ = 0x45 + 8*dest; + *buf++ = 0x00; + } else { + *buf++ = 8*dest + src; + } + return buf; +} + +Store value into memory - mov DWORD PTR [reg], reg + + When mov is used to store a value in memory, the encodings are almost + identical to the encodings for loading a value from memory, except the + first byte is 89h and the source and destination operands are reversed + when encoding the second byte. + Instruction Encoding (hex) + mov DWORD PTR [eax], eax 89 00 + mov DWORD PTR [ecx], eax 89 01 + mov DWORD PTR [edx], eax 89 02 + mov DWORD PTR [ebx], eax 89 03 + mov DWORD PTR [esp], eax 89 04 24 + mov DWORD PTR [ebp], eax 89 45 00 + mov DWORD PTR [esi], eax 89 06 + mov DWORD PTR [edi], eax 89 07 + mov DWORD PTR [eax], ecx 89 08 + mov DWORD PTR [ecx], ecx 89 09 + mov DWORD PTR [edx], ecx 89 0A + mov DWORD PTR [ebx], ecx 89 0B + mov DWORD PTR [esp], ecx 89 0C 24 + mov DWORD PTR [ebp], ecx 89 4D 00 + mov DWORD PTR [esi], ecx 89 0E + mov DWORD PTR [edi], ecx 89 0F + mov DWORD PTR [eax], edx 89 10 + mov DWORD PTR [ecx], edx 89 11 + ... + mov DWORD PTR [edi], edi 89 3F +uint8_t *mov_to_ptr(reg32_t dest, reg32_t src, uint8_t *buf) { + *buf++ = 0x89; + if (dest == ESP) { + *buf++ = 8*src + dest; + *buf++ = 0x24; + } else if (dest == EBP) { + *buf++ = 0x45 + 8*src; + *buf++ = 0x00; + } else { + *buf++ = 8*src + dest; + } + return buf; +} + +RM-encoded instructions: mov, add, sub, and, or, xor, cmp + + Next, we will tackle register-register mov, as well as add, sub, and, + or, xor, and cmp. All of these instructions have a similar encoding: an + opcode byte (that differs from one instruction to the next - hence the + name, "operation code"), followed by a single byte indicating the + source and destination registers. + + To see the pattern, consider mov and add: + Instruction Encoding (hex) Instruction Encoding (hex) + mov eax, eax 8B C0 add eax, eax 03 C0 + mov eax, ecx 8B C1 add eax, ecx 03 C1 + mov eax, edx 8B C2 add eax, edx 03 C2 + ... ... + mov eax, edi 8B C7 add eax, edi 03 C7 + mov ecx, eax 8B C8 add ecx, eax 03 C8 + mov ecx, ecx 8B C9 add ecx, ecx 03 C9 + ... ... + mov ecx, edi 8B CF add ecx, edi 03 CF + mov edx, eax 8B D0 add edx, eax 03 D0 + ... ... + mov edi, edi 8B FF add edi, edi 03 FF + + The second byte of the encoding is hex C0, plus 8 times the destination + register number, plus the source register number. +#define DEFINE_INSN_RM(mnemonic, opcode) \ +uint8_t *mnemonic(reg32_t dest, reg32_t src, uint8_t *buf) { \ + *buf++ = opcode; \ + *buf++ = 8*dest + 0xC0 + src; \ + return buf; \ +} + +DEFINE_INSN_RM(mov, 0x8B) +DEFINE_INSN_RM(add, 0x03) +DEFINE_INSN_RM(sub, 0x2B) +DEFINE_INSN_RM(and, 0x23) +DEFINE_INSN_RM( or, 0x0B) +DEFINE_INSN_RM(xor, 0x33) +DEFINE_INSN_RM(cmp, 0x3B) + +Instructions with opcodes beginning with F7: not, neg, mul, imul, div, idiv + + The not, neg, mul, imul, div, and idiv instructions also have similar + encodings. The first byte of the encoding is F7. The second byte + indicates both the operation and the operand (register). + Instruction Encoding (hex) Instruction Encoding (hex) + not eax F7 D0 neg eax F7 D8 + not ecx F7 D1 neg ecx F7 D9 + ... + not edi F7 D7 neg edi F7 DF + + As a note, we named the C function for the div instruction div_, since + the C standard library's stdlib.h includes the [13]div(3) instruction. +#define DEFINE_INSN_F7(mnemonic, reg_base) \ +uint8_t *mnemonic(reg32_t reg, uint8_t *buf) { \ + *buf++ = 0xF7; \ + *buf++ = reg_base + reg; \ + return buf; \ +} + +DEFINE_INSN_F7( not, 0xD0) +DEFINE_INSN_F7( neg, 0xD8) +DEFINE_INSN_F7( mul, 0xE0) +DEFINE_INSN_F7(imul, 0xE8) +DEFINE_INSN_F7(div_, 0xF0) +DEFINE_INSN_F7(idiv, 0xF8) + +Convert doubleword to quadword - cdq + + Both the div and idiv instructions take a 64-bit dividend (with the + high 32 bits in EDX and the low 32 bits in EAX) and divide it by a + 32-bit divisor (the register operand). To divide two 32-bit values, the + dividend must be extended to 64 bits. For unsigned division (div), this + is easy: mov edx, 0. For signed division (idiv), the 32-bit value must + be sign-extended to 64 bits. This is done by the cdq instruction: it + copies the sign bit of EAX into all 32 bits of EDX. +uint8_t *cdq(uint8_t *buf) { + *buf++ = 0x99; + return buf; +} + +Bit shift instructions - shl, shr, sar + + The bit shift instructions are interesting for two reasons: + * The number of bits to shift can be an immediate value (0-255), or + it can be stored in the CL register (another name for the lowest 8 + bits of the ECX register). + * The encoding for a one-bit shift is different. + + Using the left shift instruction as an example: + Instruction Encoding (hex) Instruction Encoding (hex) + shl eax, 0 C1 E0 00 shl eax, cl D3 E0 + shl eax, 1 D1 E0 shl ecx, cl D3 E1 + shl eax, 2 C1 E0 02 shl edx, cl D3 E2 + shl eax, 3 C1 E0 03 shl ebx, cl D3 E3 + ... ... + shl ecx, 0FFh C1 E1 FF + shl ecx, 0 C1 E1 00 + shl ecx, 1 D1 E1 + shl ecx, 2 C1 E1 02 + shl ecx, 3 C1 E1 03 + ... + shl ecx, 0FFh C1 E1 FF + ... + + We can implement this in our assembler as follows. +#define DEFINE_INSN_D1C1(mnemonic, reg_base) \ +uint8_t *mnemonic(reg32_t reg, uint8_t bits, uint8_t *buf) { \ + switch (bits) { \ + case 1: /* 1-bit shifts have a different opcode */ \ + *buf++ = 0xD1; \ + *buf++ = reg_base + reg; \ + break; \ + default: \ + *buf++ = 0xC1; \ + *buf++ = reg_base + reg; \ + *buf++ = bits; \ + } \ + return buf; \ +} \ +uint8_t *mnemonic##_cl(reg32_t reg, uint8_t *buf) { \ + *buf++ = 0xD3; \ + *buf++ = reg_base + reg; \ + return buf; \ +} + +DEFINE_INSN_D1C1(shl, 0xE0) +DEFINE_INSN_D1C1(shr, 0xE8) +DEFINE_INSN_D1C1(sar, 0xF8) + +Procedure calls: push, pop, call, ret + + The push, pop, call, and ret instructions are the four essential + instructions for procedure calls. Their encodings follow similar + patterns to those we've already seen, except with different opcode + bytes. +uint8_t *push(reg32_t reg, uint8_t *buf) { + *buf++ = 0x50 + reg; + return buf; +} + +uint8_t *pop(reg32_t reg, uint8_t *buf) { + *buf++ = 0x58 + reg; + return buf; +} + +uint8_t *call(reg32_t reg, uint8_t *buf) { + *buf++ = 0xFF; + *buf++ = 0xD0 + reg; + return buf; +} + + The encoding of ret is only slightly more interesting, since ret 0 + (which is often written as ret with no operand) is encoded differently + than ret with a nonzero immediate operand, such as ret 4 or ret 16. +uint8_t *ret(uint16_t bytes, uint8_t *buf) { + if (bytes == 0) { + *buf++ = 0xC3; + } else { + *buf++ = 0xC2; + *((uint16_t *)buf) = bytes; buf += sizeof(uint16_t); + } + return buf; +} + +Jumps + + In x86 assembly language, jumps are usually written with labels. For + example: +there: mov eax, 12345678h ; b8 78 56 34 12 + jmp there ; eb f9 + nop ; 90 + + Recall that the EIP register is the instruction pointer. When the + processor fetches an instruction to execute, it increments EIP to point + to the following instruction. A jump changes the value of EIP. In our + example, the effect of the jump is to move EIP backward by 7 bytes, so + it will point to the start of the mov instruction. + EIP is here after the processor + fetches the "jmp there" instruction + |v +B8 78 56 34 12 EB F9 90 +^|___________________________| +We want to move it 7 bytes backward +to place it here + + So, how is jmp encoded? Hex F9 is the two's complement representation + of -7... so the encoding above (EB F9) is in essence "jump -7 bytes." + + Complicating things slightly, the jmp instruction is encoded with an EB + opcode byte if the jump distance is between -128 and 127 bytes, + inclusive, and with an E9 opcode if the jump distance is larger than + that. +uint8_t *jmp(int32_t bytes, uint8_t *buf) { + if (INT8_MIN <= bytes && bytes <= INT8_MAX) { + *buf++ = 0xEB; + *buf++ = (int8_t)bytes; + } else { + *buf++ = 0xE9; + *((int32_t *)buf) = bytes; buf += sizeof(int32_t); + } + return buf; +} + + Conditional jumps are encoded similarly, except with different opcodes, + of course. +#define DEFINE_INSN_JCC(mnemonic, byte_opcode) \ +uint8_t *mnemonic(int32_t bytes, uint8_t *buf) { \ + if (INT8_MIN <= bytes && bytes <= INT8_MAX) { \ + *buf++ = byte_opcode; \ + *buf++ = (int8_t)bytes; \ + } else { \ + *buf++ = 0x0F; \ + *buf++ = byte_opcode + 0x10; \ + *((int32_t *)buf) = bytes; buf += sizeof(int32_t); \ + } \ + return buf; \ +} + +DEFINE_INSN_JCC( jb, 0x72) +DEFINE_INSN_JCC(jae, 0x73) +DEFINE_INSN_JCC( je, 0x74) +DEFINE_INSN_JCC(jne, 0x75) +DEFINE_INSN_JCC(jbe, 0x76) +DEFINE_INSN_JCC( ja, 0x77) +DEFINE_INSN_JCC( jl, 0x7C) +DEFINE_INSN_JCC(jge, 0x7D) +DEFINE_INSN_JCC(jle, 0x7E) +DEFINE_INSN_JCC( jg, 0x7F) + +5. What's next? + + So, we have a working x86 assembler. Not bad for 256 lines of code. You + can download the complete source code below. + + In the next few posts, we'll: + * show how to test this assembler (are you sure it actually works?). + * show how to find the encodings of other instructions (in case you + want to extend this assembler). + * show how to actually execute the generated machine code. + + At some point in the future - maybe not right away - I'd like to + * show how the Builder design pattern can make the assembler easier + to use. + * build an x64 assembler (since you're probably not running a 32-bit + machine). + + But there are plenty of other non-assembler-related topics I'd like to + blog about, so let's see what actually materializes. + +Download the source code + + Source Code: [14]x86asm.h 69 lines + [15]x86asm.c 171 lines + [16]demo.c 16 lines + Total: 256 lines + Makefiles: [17]GNUmakefile (GNU Make on Linux/macOS) + [18]Makefile (NMAKE on Windows) + + ^1 If you're familiar with the x86 encoding scheme, [EBP] is actually + encoded as [EBP+0] (i.e., EBP with an 8-bit displacement), and ESP is + encoded using the SIB byte. + + Published on 15 Jan 2017 o 4019 words o Comments? [19]E-mail me! + + Copyright © 2017 Jeffrey L. Overbey. All rights reserved. Except for + source code where an explicit license is given, no part of this blog + may be copied, reproduced, published, translated, or distributed, in + whole or in part, without the written permission of the copyright + owner. + +References + + 1. http://blog.jeff.over.bz/rss.xml + 2. http://jeff.over.bz/ + 3. http://blog.jeff.over.bz/refactoring/golang/godoctor/2018/04/22/building-a-godoctor-refactoring.html + 4. http://blog.jeff.over.bz/compilers/lexers/2017/09/09/lexical-analysis.html + 5. http://blog.jeff.over.bz/performance/statistics/2017/06/01/on-performance-improvements.html + 6. http://blog.jeff.over.bz/assembly/compilers/jit/2017/03/30/executing-dynamically-generated-machine-code.html + 7. http://blog.jeff.over.bz/assembly/2017/02/15/finding-machine-language-encodings.html + 8. http://blog.jeff.over.bz/assembly/compilers/jit/2017/01/15/x86-assembler.html + 9. http://blog.jeff.over.bz/blog/2016/12/15/first-post.html + 10. http://blog.jeff.over.bz/rss.xml + 11. http://jeff.over.bz/ + 12. https://software.intel.com/en-us/articles/intel-sdm + 13. https://linux.die.net/man/3/div + 14. http://blog.jeff.over.bz/_posts/code/x86-assembler/x86asm.h + 15. http://blog.jeff.over.bz/_posts/code/x86-assembler/x86asm.c + 16. http://blog.jeff.over.bz/_posts/code/x86-assembler/demo.c + 17. http://blog.jeff.over.bz/_posts/code/x86-assembler/GNUmakefile + 18. http://blog.jeff.over.bz/_posts/code/x86-assembler/Makefile + 19. http://www.google.com/recaptcha/mailhide/d?k=01Y7tF2jw9w9xLZucF314wPA==&c=q6vUJyh7NztFNzwTojRuECzwRgJ8lixJT3LvIi58VCM= |