21 files changed, 8438 insertions, 19 deletions
diff --git a/BUGS b/BUGS
index bd51c2d..f9c5146 100644
--- a/BUGS
+++ b/BUGS
@@ -1,3 +1,4 @@
+- Asus EEPC 701 doesn't boot, pendrive 16 GB 0x000 0x0012
 - VGA power cycles in bochs and on unisys
   (0) [0x0000fffffff0] f000:fff0 (unk. ctxt): jmpf 0xf000:e05b          ; ea5be000f0
   this seems to be the BIOS entry point
diff --git a/TODOS b/TODOS
index 89217b6..9cdbcaf 100644
--- a/TODOS
+++ b/TODOS
@@ -4,5 +4,4 @@
   - make it boot from disks, USB-pen drives, etc.
 - make the VGA mode switch back to text less hacky (following the
   modes.c guide)
-  
-
+- serial console over gdb and x/y/zmodem file transfers before doing network stuff?
diff --git a/doc/FEEDBACK.TYNDUR b/doc/FEEDBACK.TYNDUR
new file mode 100644
index 0000000..f9d57fb
--- /dev/null
+++ b/doc/FEEDBACK.TYNDUR
@@ -0,0 +1,18 @@
+rtl8139 driver
+
+    // Warten bis der Reset beendet ist
+    while ((read_register_byte(netcard, REG_COMMAND) & REG_COMMAND) ==
+        CR_RESET);
+
+    // docu says reading ISR clears the interrupts, writting has no effect:
+    write_register_word(netcard, REG_INTERRUPT_STATUS, 0);
+    // so this is wrong
+
+    write_register_word(netcard, REG_INTERRUPT_MASK, 0xFFFF);
+
+    // not quite sure, but I would use an uint16_t as we read a word, not a dword
+    // from ISR
+    uint32_t isr = read_register_word(netcard, REG_INTERRUPT_STATUS);
+
+    // where is DMA (PCI bus mastering) enabled? not visible in driver, quite
+    // confusuing for the newbie
+\ No newline at end of file
diff --git a/doc/FEEDBACK.WYOOS b/doc/FEEDBACK.WYOOS
new file mode 100644
index 0000000..f443502
--- /dev/null
+++ b/doc/FEEDBACK.WYOOS
@@ -0,0 +1,85 @@
+Ep 12.
+- bad design: well, mapped memory area was small on a PC (19-bit),
+  when VGA came along. Other platforms already had 24-bit or 32-bit
+  address buses. This and the (in)famous I/O ports make things look
+  bad. It was a cheap solution for an already established architecture,
+  which had to remain backwards compatible (to things like EGA and CGA).
+- some many ports, this is because it was an "array of chips", all
+  with their own port (vector graphics array)
+-         case 0<<2: return (uint8_t*)0x00000;
+  -> I read docu A0000
+- strange bar at the bottom of the screen (in qemu), it seems, the
+  fill-rectangle stops before reaching the bottom. Timing issue in
+  emulators (bochs and qemu), happens with tcc/gcc, not with clang
+  (optimizations?)
+- Some more links on VGA programming without a BIOS
+  http://bos.asmhackers.net/docs/vga_without_bios/docs/
+
+Ep. 13:
+- Don't agree with "everything implement as pixel"! See osdev comments
+  on that. At least a note on, yes we do it here for educational reasons
+  the same way we are using 320x200x8 and not something multi-planar
+  would be an idea.
+- Why color as property of a widget?
+- Why not call the base method in composite widget getFocus?
+- Colors shouldn't theybe 0000aa instead of 0000a8?
+
+Ep. 14:
+- I would actually do a console doing serial/VGA text and then later
+  VGA graphics console (like xconsole), mentioned at the end of the
+  video. good.
+- the mouse cursor gets reddrawn? I would actually introduce the concept
+  of sprites and memory to VGA masking for fonts and the mouse cursor
+- Readrawing the whole desktop when the mouse cursor moves?
+- Do not test in putpixel, make it fatal, make sure the mouse drawing
+  routine protects against writting outisde of the screen. I don't
+  think automatically cutting of drawing areas is the way to go, especially
+  if partial redraws must be implemented later.
+- Z-buffers would be nice to introduce, but as there is no memory
+  manager yet we cannot allocate 320x200=64k yet. Could we implement
+  an alloca? Or just a fixed buffer in the video driver?
+- window move doesn't cover borders and corner cases like when the
+  mouse leaves the area of the window while dragging (thusly requiring
+  us to disable dragging)
+- should a desktop paint the mouse cursor and implement a mouse handler?
+  This also means the interrupt handler directly calls GUI routines, this
+  cannot be good. A realistic design would implement an event queue here..
+
+Ep. 15
+- eflags 0x202, magic?! what does it mean? Also iret should be explained
+  once more in order to understand the packed structure CPUState
+- we cannot get back to the main stack, we should have a schedule_stop,
+  so we can go back into the main thread and terminate the kernel properly
+- not very obvious this is IRQ 0:
+  if(interrupt == hardwareInterruptOffset)
+  better explicit
+
+Ep. 16
+- how to get a good place for the heap
+  maybe it's part of the multiboot grub structure?
+  or we need to ask the bios for the memory layout?
+
+Ep. 17
+- did another network driver RTL8137, deviating heavily from tutorial, also
+  answering QEMU ARP requests first (in user network mode)
+- malloc -> memalign or aligned_alloc instead of playing with +16 &0f and stuff
+  like that
+- garbage descriptors, yes, it is weird, but in hardware there are usually
+  reasons, why it is done like this.
+
+Ep. 18
+- qemu and a user network with
+  -netdev user,id=net0,net=10.0.0.0/24,host=10.0.0.2,dhcpstart=10.0.0.16,hostfwd=tcp::8080-:80,hostfwd=udp::8081-:81
+  gives us very fast access to sort of a pre-configured network
+  This gives the ARP on the first UDP/TCP packet
+
+Ep. A01
+- why 65535 handlers? So many?, a for all uint16_t possible types of ethernet types,
+  this means wasting 128 for a really sparse switch.
+  -> I would do a hash from protocols to slots, from slots to handlers
+- wireshark and tcpdump for debugging
+
+Ep.20
+- user space, Oberon has none. syscalls are there to sanitize values and calls from
+  user processes
+
diff --git a/doc/LINKS.TODO b/doc/LINKS.TODO
index 9659164..c7e0c4d 100644
--- a/doc/LINKS.TODO
+++ b/doc/LINKS.TODO
@@ -14,9 +14,11 @@ http://x86.renejeschke.de/html/file_module_x86_id_139.html
 http://retired.beyondlogic.org/serial/serial1.htm
 https://pdos.csail.mit.edu/6.828/2014/reference.html
 
-other projects
-
+https://visopsys.org/osdev/
+sortix, soso, managarm (mlibc)
 https://bitbucket.org/adamholisky/versionone 
+https://collapseos.org/
+https://github.com/mikaku/Fiwix
 
 tutorials:
 
@@ -26,6 +28,7 @@ http://www.brokenthorn.com/Resources/OSDevIndex.html
 - fitting code following the brokenthorm tutorial:
   https://github.com/tuhdo/os-study
 http://www.fysnet.net/
+http://www.brokenthorn.com/Resources/
 
 In C#:
 https://github.com/FlingOS/FlingOS
@@ -33,6 +36,9 @@ https://github.com/FlingOS/FlingOS
 In Rust:
 http://os.phil-opp.com/
 
+OS with striclty types language:
+http://web.yl.is.s.u-tokyo.ac.jp/~tosh/talk/
+
 Interrupts:
 http://wiki.osdev.org/Exceptions
 http://wiki.osdev.org/Interrupt_Descriptor_Table
@@ -84,6 +90,7 @@ http://www.uruk.org/orig-grub/PC_partitioning.txt
 http://wiki.osdev.org/ATA_in_x86_RealMode_%28BIOS%29
 other Boot (not used, but nice other projects):
 https://github.com/vladimirfedorov/bootsector.git: stage1 with FAT, using FASM
+https://github.com/civboot/civboot
 
 C:
 http://www.drdobbs.com/extending-c-for-object-oriented-programm/184402731
@@ -99,6 +106,8 @@ http://files.osdev.org/mirrors/geezer/osd/graphics/modes.c
 http://www.osdever.net/FreeVGA/home.htm
 http://wiki.osdev.org/VGA_Fonts
 http://bos.asmhackers.net/docs/vga_without_bios/
+abaos: daveho https://int10h.org/oldschool-pc-fonts/fontlist/
+https://os.phil-opp.com/vga-text-mode/
 
 GUI:
 https://github.com/vurtun/nuklear
@@ -138,3 +147,42 @@ AX, BX software interrupt 0x80, classic Linux/DOS/Windows syscalls
 
 UTP/Netconsole:
 https://www.cyberciti.biz/tips/linux-netconsole-log-management-tutorial.html
+
+intro in gas and nasm
+https://developer.ibm.com/articles/l-gas-nasm/
+
+https://github.com/wbenny/ia32-doc
+
+https://www.jwz.org/doc/worse-is-better.html
+
+http://www.independent-software.com/operating-system-development-jumping-to-protected-mode.html
+
+https://wiki.osdev.org/Bare_Bones
+https://wiki.osdev.org/OS_Specific_Toolchain
+https://wiki.osdev.org/GCC_Cross-Compiler
+BOOTBOOT
+https://forum.osdev.org/viewtopic.php?f=2&t=33362
+
+https://github.com/jtsiomb/pcboot/
+minimal 32-bit protected mode booting kernel
+
+https://github.com/Supercip971/WingOS
+
+boot loader
+https://github.com/limine-bootloader/limine
+q-word OS:
+https://github.com/qword-os/echfs
+https://github.com/qword-os/lvlibc
+https://github.com/Supercip971/WingOS_x64.git
+
+echfs, simple filesystem
+https://github.com/echfs/echfs, 32-bit port needed
+
+prex real time embedded os:
+https://github.com/AndrewD/prex
+
+https://github.com/spacerace/romfont
+
+etherboot, gPXE, iPXE, HTTP boot:
+https://sites.google.com/site/richardgroux/infrastructure-1/Network/pxe-boot/what-is-gpxe-1
+linux floppy code has been removed in 2003, had a 1MB limit issue anyway
diff --git a/doc/README.ELF b/doc/README.ELF
new file mode 100644
index 0000000..253e976
--- /dev/null
+++ b/doc/README.ELF
@@ -0,0 +1,6 @@
+http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
+http://virus.enemy.org/virus-writing-HOWTO/_html/i386-redhat8.0-linux/language.of.evil.html
+http://timelessname.com/elfbin/
+https://chris.bracken.jp/2018/10/decoding-an-elf-binary/
+http://archive.main.lv/writeup/create_elf_file_from_scratch.html
+https://www.cs.kent.ac.uk/people/staff/srk21/blog/devel/
diff --git a/doc/README.Others b/doc/README.Others
index a28f472..3b6ef0e 100644
--- a/doc/README.Others
+++ b/doc/README.Others
@@ -3,3 +3,8 @@ interesting operating systems with quite some differing approaches:
 OS/ Eumel
 9P: network filesystem protocol, Plan 9, Styx, expose everything as simple virtual file systems
 https://en.wikipedia.org/wiki/Inferno_(operating_system)
+
+https://littleosbook.github.io/
+https://klacansky.com/notes/useless-operating-system.html
+
+https://github.com/cirosantilli/x86-bare-metal-examples
diff --git a/doc/README.Shell b/doc/README.Shell
new file mode 100644
index 0000000..b24f205
--- /dev/null
+++ b/doc/README.Shell
@@ -0,0 +1 @@
+https://brennan.io/2015/01/16/write-a-shell-in-c/
diff --git a/doc/README.UEFI b/doc/README.UEFI
new file mode 100644
index 0000000..4584e60
--- /dev/null
+++ b/doc/README.UEFI
@@ -0,0 +1,2 @@
+https://www.kraxel.org/repos/images/
+http://www.rodsbooks.com/efi-programming/hello.html
diff --git a/doc/brennan.io_2015_01_16_write-a-shell-in-c.txt b/doc/brennan.io_2015_01_16_write-a-shell-in-c.txt
new file mode 100644
index 0000000..22e23da
--- /dev/null
+++ b/doc/brennan.io_2015_01_16_write-a-shell-in-c.txt
@@ -0,0 +1,636 @@
+   [ ]
+     * [1]Stephen Brennan
+     * [2]Blog
+     * [3]Projects
+     * [4]Resume
+
+Tutorial - Write a Shell in C
+
+   Stephen Brennan o 16 January 2015
+
+   It's easy to view yourself as "not a real programmer." There are
+   programs out there that everyone uses, and it's easy to put their
+   developers on a pedestal. Although developing large software projects
+   isn't easy, many times the basic idea of that software is quite simple.
+   Implementing it yourself is a fun way to show that you have what it
+   takes to be a real programmer. So, this is a walkthrough on how I wrote
+   my own simplistic Unix shell in C, in the hopes that it makes other
+   people feel that way too.
+
+   The code for the shell described here, dubbed lsh, is available on
+   [5]GitHub.
+
+   University students beware! Many classes have assignments that ask you
+   to write a shell, and some faculty are aware of this tutorial and code.
+   If you're a student in such a class, you shouldn't copy (or copy then
+   modify) this code without permission. And even then, I would [6]advise
+   against heavily relying on this tutorial.
+
+Basic lifetime of a shell
+
+   Let's look at a shell from the top down. A shell does three main things
+   in its lifetime.
+     * Initialize: In this step, a typical shell would read and execute
+       its configuration files. These change aspects of the shell's
+       behavior.
+     * Interpret: Next, the shell reads commands from stdin (which could
+       be interactive, or a file) and executes them.
+     * Terminate: After its commands are executed, the shell executes any
+       shutdown commands, frees up any memory, and terminates.
+
+   These steps are so general that they could apply to many programs, but
+   we're going to use them for the basis for our shell. Our shell will be
+   so simple that there won't be any configuration files, and there won't
+   be any shutdown command. So, we'll just call the looping function and
+   then terminate. But in terms of architecture, it's important to keep in
+   mind that the lifetime of the program is more than just looping.
+int main(int argc, char **argv)
+{
+  // Load config files, if any.
+
+  // Run command loop.
+  lsh_loop();
+
+  // Perform any shutdown/cleanup.
+
+  return EXIT_SUCCESS;
+}
+
+   Here you can see that I just came up with a function, lsh_loop(), that
+   will loop, interpreting commands. We'll see the implementation of that
+   next.
+
+Basic loop of a shell
+
+   So we've taken care of how the program should start up. Now, for the
+   basic program logic: what does the shell do during its loop? Well, a
+   simple way to handle commands is with three steps:
+     * Read: Read the command from standard input.
+     * Parse: Separate the command string into a program and arguments.
+     * Execute: Run the parsed command.
+
+   Here, I'll translate those ideas into code for lsh_loop():
+void lsh_loop(void)
+{
+  char *line;
+  char **args;
+  int status;
+
+  do {
+    printf("> ");
+    line = lsh_read_line();
+    args = lsh_split_line(line);
+    status = lsh_execute(args);
+
+    free(line);
+    free(args);
+  } while (status);
+}
+
+   Let's walk through the code. The first few lines are just declarations.
+   The do-while loop is more convenient for checking the status variable,
+   because it executes once before checking its value. Within the loop, we
+   print a prompt, call a function to read a line, call a function to
+   split the line into args, and execute the args. Finally, we free the
+   line and arguments that we created earlier. Note that we're using a
+   status variable returned by lsh_execute() to determine when to exit.
+
+Reading a line
+
+   Reading a line from stdin sounds so simple, but in C it can be a
+   hassle. The sad thing is that you don't know ahead of time how much
+   text a user will enter into their shell. You can't simply allocate a
+   block and hope they don't exceed it. Instead, you need to start with a
+   block, and if they do exceed it, reallocate with more space. This is a
+   common strategy in C, and we'll use it to implement lsh_read_line().
+#define LSH_RL_BUFSIZE 1024
+char *lsh_read_line(void)
+{
+  int bufsize = LSH_RL_BUFSIZE;
+  int position = 0;
+  char *buffer = malloc(sizeof(char) * bufsize);
+  int c;
+
+  if (!buffer) {
+    fprintf(stderr, "lsh: allocation error\n");
+    exit(EXIT_FAILURE);
+  }
+
+  while (1) {
+    // Read a character
+    c = getchar();
+
+    // If we hit EOF, replace it with a null character and return.
+    if (c == EOF || c == '\n') {
+      buffer[position] = '\0';
+      return buffer;
+    } else {
+      buffer[position] = c;
+    }
+    position++;
+
+    // If we have exceeded the buffer, reallocate.
+    if (position >= bufsize) {
+      bufsize += LSH_RL_BUFSIZE;
+      buffer = realloc(buffer, bufsize);
+      if (!buffer) {
+        fprintf(stderr, "lsh: allocation error\n");
+        exit(EXIT_FAILURE);
+      }
+    }
+  }
+}
+
+   The first part is a lot of declarations. If you hadn't noticed, I
+   prefer to keep the old C style of declaring variables before the rest
+   of the code. The meat of the function is within the (apparently
+   infinite) while (1) loop. In the loop, we read a character (and store
+   it as an int, not a char, that's important! EOF is an integer, not a
+   character, and if you want to check for it, you need to use an int.
+   This is a common beginner C mistake.). If it's the newline, or EOF, we
+   null terminate our current string and return it. Otherwise, we add the
+   character to our existing string.
+
+   Next, we see whether the next character will go outside of our current
+   buffer size. If so, we reallocate our buffer (checking for allocation
+   errors) before continuing. And that's really it.
+
+   Those who are intimately familiar with newer versions of the C library
+   may note that there is a getline() function in stdio.h that does most
+   of the work we just implemented. To be completely honest, I didn't know
+   it existed until after I wrote this code. This function was a GNU
+   extension to the C library until 2008, when it was added to the
+   specification, so most modern Unixes should have it now. I'm leaving my
+   existing code the way it is, and I encourage people to learn it this
+   way first before using getline. You'd be robbing yourself of a learning
+   opportunity if you didn't! Anyhow, with getline, the function becomes
+   easier:
+char *lsh_read_line(void)
+{
+  char *line = NULL;
+  ssize_t bufsize = 0; // have getline allocate a buffer for us
+
+  if (getline(&line, &bufsize, stdin) == -1){
+    if (feof(stdin)) {
+      exit(EXIT_SUCCESS);  // We recieved an EOF
+    } else  {
+      perror("readline");
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  return line;
+}
+
+   This is not 100% trivial because we still need to check for EOF or
+   errors while reading. EOF (end of file) means that either we were
+   reading commands from a text file which we've reached the end of, or
+   the user typed Ctrl-D, which signals end-of-file. Either way, it means
+   we should exit successfully, and if any other error occurs, we should
+   fail after printing the error.
+
+Parsing the line
+
+   OK, so if we look back at the loop, we see that we now have implemented
+   lsh_read_line(), and we have the line of input. Now, we need to parse
+   that line into a list of arguments. I'm going to make a glaring
+   simplification here, and say that we won't allow quoting or backslash
+   escaping in our command line arguments. Instead, we will simply use
+   whitespace to separate arguments from each other. So the command echo
+   "this message" would not call echo with a single argument this message,
+   but rather it would call echo with two arguments: "this and message".
+
+   With those simplifications, all we need to do is "tokenize" the string
+   using whitespace as delimiters. That means we can break out the classic
+   library function strtok to do some of the dirty work for us.
+#define LSH_TOK_BUFSIZE 64
+#define LSH_TOK_DELIM " \t\r\n\a"
+char **lsh_split_line(char *line)
+{
+  int bufsize = LSH_TOK_BUFSIZE, position = 0;
+  char **tokens = malloc(bufsize * sizeof(char*));
+  char *token;
+
+  if (!tokens) {
+    fprintf(stderr, "lsh: allocation error\n");
+    exit(EXIT_FAILURE);
+  }
+
+  token = strtok(line, LSH_TOK_DELIM);
+  while (token != NULL) {
+    tokens[position] = token;
+    position++;
+
+    if (position >= bufsize) {
+      bufsize += LSH_TOK_BUFSIZE;
+      tokens = realloc(tokens, bufsize * sizeof(char*));
+      if (!tokens) {
+        fprintf(stderr, "lsh: allocation error\n");
+        exit(EXIT_FAILURE);
+      }
+    }
+
+    token = strtok(NULL, LSH_TOK_DELIM);
+  }
+  tokens[position] = NULL;
+  return tokens;
+}
+
+   If this code looks suspiciously similar to lsh_read_line(), it's
+   because it is! We are using the same strategy of having a buffer and
+   dynamically expanding it. But this time, we're doing it with a
+   null-terminated array of pointers instead of a null-terminated array of
+   characters.
+
+   At the start of the function, we begin tokenizing by calling strtok. It
+   returns a pointer to the first token. What strtok() actually does is
+   return pointers to within the string you give it, and place \0 bytes at
+   the end of each token. We store each pointer in an array (buffer) of
+   character pointers.
+
+   Finally, we reallocate the array of pointers if necessary. The process
+   repeats until no token is returned by strtok, at which point we
+   null-terminate the list of tokens.
+
+   So, once all is said and done, we have an array of tokens, ready to
+   execute. Which begs the question, how do we do that?
+
+How shells start processes
+
+   Now, we're really at the heart of what a shell does. Starting processes
+   is the main function of shells. So writing a shell means that you need
+   to know exactly what's going on with processes and how they start.
+   That's why I'm going to take us on a short diversion to discuss
+   processes in Unix-like operating systems.
+
+   There are only two ways of starting processes on Unix. The first one
+   (which almost doesn't count) is by being Init. You see, when a Unix
+   computer boots, its kernel is loaded. Once it is loaded and
+   initialized, the kernel starts only one process, which is called Init.
+   This process runs for the entire length of time that the computer is
+   on, and it manages loading up the rest of the processes that you need
+   for your computer to be useful.
+
+   Since most programs aren't Init, that leaves only one practical way for
+   processes to get started: the fork() system call. When this function is
+   called, the operating system makes a duplicate of the process and
+   starts them both running. The original process is called the "parent",
+   and the new one is called the "child". fork() returns 0 to the child
+   process, and it returns to the parent the process ID number (PID) of
+   its child. In essence, this means that the only way for new processes
+   is to start is by an existing one duplicating itself.
+
+   This might sound like a problem. Typically, when you want to run a new
+   process, you don't just want another copy of the same program - you
+   want to run a different program. That's what the exec() system call is
+   all about. It replaces the current running program with an entirely new
+   one. This means that when you call exec, the operating system stops
+   your process, loads up the new program, and starts that one in its
+   place. A process never returns from an exec() call (unless there's an
+   error).
+
+   With these two system calls, we have the building blocks for how most
+   programs are run on Unix. First, an existing process forks itself into
+   two separate ones. Then, the child uses exec() to replace itself with a
+   new program. The parent process can continue doing other things, and it
+   can even keep tabs on its children, using the system call wait().
+
+   Phew! That's a lot of information, but with all that background, the
+   following code for launching a program will actually make sense:
+int lsh_launch(char **args)
+{
+  pid_t pid, wpid;
+  int status;
+
+  pid = fork();
+  if (pid == 0) {
+    // Child process
+    if (execvp(args[0], args) == -1) {
+      perror("lsh");
+    }
+    exit(EXIT_FAILURE);
+  } else if (pid < 0) {
+    // Error forking
+    perror("lsh");
+  } else {
+    // Parent process
+    do {
+      wpid = waitpid(pid, &status, WUNTRACED);
+    } while (!WIFEXITED(status) && !WIFSIGNALED(status));
+  }
+
+  return 1;
+}
+
+   Alright. This function takes the list of arguments that we created
+   earlier. Then, it forks the process, and saves the return value. Once
+   fork() returns, we actually have two processes running concurrently.
+   The child process will take the first if condition (where pid == 0).
+
+   In the child process, we want to run the command given by the user. So,
+   we use one of the many variants of the exec system call, execvp. The
+   different variants of exec do slightly different things. Some take a
+   variable number of string arguments. Others take a list of strings.
+   Still others let you specify the environment that the process runs
+   with. This particular variant expects a program name and an array (also
+   called a vector, hence the `v') of string arguments (the first one has
+   to be the program name). The `p' means that instead of providing the
+   full file path of the program to run, we're going to give its name, and
+   let the operating system search for the program in the path.
+
+   If the exec command returns -1 (or actually, if it returns at all), we
+   know there was an error. So, we use perror to print the system's error
+   message, along with our program name, so users know where the error
+   came from. Then, we exit so that the shell can keep running.
+
+   The second condition (pid < 0) checks whether fork() had an error. If
+   so, we print it and keep going - there's no handling that error beyond
+   telling the user and letting them decide if they need to quit.
+
+   The third condition means that fork() executed successfully. The parent
+   process will land here. We know that the child is going to execute the
+   process, so the parent needs to wait for the command to finish running.
+   We use waitpid() to wait for the process's state to change.
+   Unfortunately, waitpid() has a lot of options (like exec()). Processes
+   can change state in lots of ways, and not all of them mean that the
+   process has ended. A process can either exit (normally, or with an
+   error code), or it can be killed by a signal. So, we use the macros
+   provided with waitpid() to wait until either the processes are exited
+   or killed. Then, the function finally returns a 1, as a signal to the
+   calling function that we should prompt for input again.
+
+Shell Builtins
+
+   You may have noticed that the lsh_loop() function calls lsh_execute(),
+   but above, we titled our function lsh_launch(). This was intentional!
+   You see, most commands a shell executes are programs, but not all of
+   them. Some of them are built right into the shell.
+
+   The reason is actually pretty simple. If you want to change directory,
+   you need to use the function chdir(). The thing is, the current
+   directory is a property of a process. So, if you wrote a program called
+   cd that changed directory, it would just change its own current
+   directory, and then terminate. Its parent process's current directory
+   would be unchanged. Instead, the shell process itself needs to execute
+   chdir(), so that its own current directory is updated. Then, when it
+   launches child processes, they will inherit that directory too.
+
+   Similarly, if there was a program named exit, it wouldn't be able to
+   exit the shell that called it. That command also needs to be built into
+   the shell. Also, most shells are configured by running configuration
+   scripts, like ~/.bashrc. Those scripts use commands that change the
+   operation of the shell. These commands could only change the shell's
+   operation if they were implemented within the shell process itself.
+
+   So, it makes sense that we need to add some commands to the shell
+   itself. The ones I added to my shell are cd, exit, and help. Here are
+   their function implementations below:
+/*
+  Function Declarations for builtin shell commands:
+ */
+int lsh_cd(char **args);
+int lsh_help(char **args);
+int lsh_exit(char **args);
+
+/*
+  List of builtin commands, followed by their corresponding functions.
+ */
+char *builtin_str[] = {
+  "cd",
+  "help",
+  "exit"
+};
+
+int (*builtin_func[]) (char **) = {
+  &lsh_cd,
+  &lsh_help,
+  &lsh_exit
+};
+
+int lsh_num_builtins() {
+  return sizeof(builtin_str) / sizeof(char *);
+}
+
+/*
+  Builtin function implementations.
+*/
+int lsh_cd(char **args)
+{
+  if (args[1] == NULL) {
+    fprintf(stderr, "lsh: expected argument to \"cd\"\n");
+  } else {
+    if (chdir(args[1]) != 0) {
+      perror("lsh");
+    }
+  }
+  return 1;
+}
+
+int lsh_help(char **args)
+{
+  int i;
+  printf("Stephen Brennan's LSH\n");
+  printf("Type program names and arguments, and hit enter.\n");
+  printf("The following are built in:\n");
+
+  for (i = 0; i < lsh_num_builtins(); i++) {
+    printf("  %s\n", builtin_str[i]);
+  }
+
+  printf("Use the man command for information on other programs.\n");
+  return 1;
+}
+
+int lsh_exit(char **args)
+{
+  return 0;
+}
+
+   There are three parts to this code. The first part contains forward
+   declarations of my functions. A forward declaration is when you declare
+   (but don't define) something, so that you can use its name before you
+   define it. The reason I do this is because lsh_help() uses the array of
+   builtins, and the arrays contain lsh_help(). The cleanest way to break
+   this dependency cycle is by forward declaration.
+
+   The next part is an array of builtin command names, followed by an
+   array of their corresponding functions. This is so that, in the future,
+   builtin commands can be added simply by modifying these arrays, rather
+   than editing a large "switch" statement somewhere in the code. If
+   you're confused by the declaration of builtin_func, that's OK! I am
+   too. It's an array of function pointers (that take array of strings and
+   return an int). Any declaration involving function pointers in C can
+   get really complicated. I still look up how function pointers are
+   declared myself!^[7]1
+
+   Finally, I implement each function. The lsh_cd() function first checks
+   that its second argument exists, and prints an error message if it
+   doesn't. Then, it calls chdir(), checks for errors, and returns. The
+   help function prints a nice message and the names of all the builtins.
+   And the exit function returns 0, as a signal for the command loop to
+   terminate.
+
+Putting together builtins and processes
+
+   The last missing piece of the puzzle is to implement lsh_execute(), the
+   function that will either launch a builtin, or a process. If you're
+   reading this far, you'll know that we've set ourselves up for a really
+   simple function:
+int lsh_execute(char **args)
+{
+  int i;
+
+  if (args[0] == NULL) {
+    // An empty command was entered.
+    return 1;
+  }
+
+  for (i = 0; i < lsh_num_builtins(); i++) {
+    if (strcmp(args[0], builtin_str[i]) == 0) {
+      return (*builtin_func[i])(args);
+    }
+  }
+
+  return lsh_launch(args);
+}
+
+   All this does is check if the command equals each builtin, and if so,
+   run it. If it doesn't match a builtin, it calls lsh_launch() to launch
+   the process. The one caveat is that args might just contain NULL, if
+   the user entered an empty string, or just whitespace. So, we need to
+   check for that case at the beginning.
+
+Putting it all together
+
+   That's all the code that goes into the shell. If you've read along, you
+   should understand completely how the shell works. To try it out (on a
+   Linux machine), you would need to copy these code segments into a file
+   (main.c), and compile it. Make sure to only include one implementation
+   of lsh_read_line(). You'll need to include the following headers at the
+   top. I've added notes so that you know where each function comes from.
+     * #include <sys/wait.h>
+          + waitpid() and associated macros
+     * #include <unistd.h>
+          + chdir()
+          + fork()
+          + exec()
+          + pid_t
+     * #include <stdlib.h>
+          + malloc()
+          + realloc()
+          + free()
+          + exit()
+          + execvp()
+          + EXIT_SUCCESS, EXIT_FAILURE
+     * #include <stdio.h>
+          + fprintf()
+          + printf()
+          + stderr
+          + getchar()
+          + perror()
+     * #include <string.h>
+          + strcmp()
+          + strtok()
+
+   Once you have the code and headers, it should be as simple as running
+   gcc -o main main.c to compile it, and then ./main to run it.
+
+   Alternatively, you can get the code from [8]GitHub. That link goes
+   straight to the current revision of the code at the time of this
+   writing- I may choose to update it and add new features someday in the
+   future. If I do, I'll try my best to update this article with the
+   details and implementation ideas.
+
+Wrap up
+
+   If you read this and wondered how in the world I knew how to use those
+   system calls, the answer is simple: man pages. In man 3p there is
+   thorough documentation on every system call. If you know what you're
+   looking for, and you just want to know how to use it, the man pages are
+   your best friend. If you don't know what sort of interface the C
+   library and Unix offer you, I would point you toward the [9]POSIX
+   Specification, specifically Section 13, "Headers". You can find each
+   header and everything it is required to define in there.
+
+   Obviously, this shell isn't feature-rich. Some of its more glaring
+   omissions are:
+     * Only whitespace separating arguments, no quoting or backslash
+       escaping.
+     * No piping or redirection.
+     * Few standard builtins.
+     * No globbing.
+
+   The implementation of all of this stuff is really interesting, but way
+   more than I could ever fit into an article like this. If I ever get
+   around to implementing any of them, I'll be sure to write a follow-up
+   about it. But I'd encourage any reader to try implementing this stuff
+   yourself. If you're met with success, drop me a line in the comments
+   below, I'd love to see the code.
+
+   And finally, thanks for reading this tutorial (if anyone did). I
+   enjoyed writing it, and I hope you enjoyed reading it. Let me know what
+   you think in the comments!
+
+   Edit: In an earlier version of this article, I had a couple nasty bugs
+   in lsh_split_line(), that just happened to cancel each other out.
+   Thanks to /u/munmap on Reddit (and other commenters) for catching them!
+   Check [10]this diff to see exactly what I did wrong.
+
+   Edit 2: Thanks to user ghswa on GitHub for contributing some null
+   checks for malloc() that I forgot. He/she also pointed out that the
+   [11]manpage for getline() specifies that the first argument should be
+   freeable, so line should be initialized to NULL in my lsh_read_line()
+   implementation that uses getline().
+
+   Edit 3: It's 2020 and we're still finding bugs, this is why software is
+   hard. Credit to [12]harishankarv on Github, for finding an issue with
+   my "simple" implementation of lsh_read_line() that depends on
+   getline(). See [13]this issue for details - the text of the blog is
+   updated.
+
+Footnotes
+
+    1. Edit 4/Footnote: It's 2021, over 6.5 years since writing this
+       tutorial. I now work on operating systems in C for a living. I just
+       wanted to say that I still do not remember how to declare a
+       function pointer. I still need to Google it every time. [14]&#8617;
+
+     * Share on:
+     *
+     *
+     *
+     *
+     __________________________________________________________________
+
+   [15]Legal o [16]RSS
+   [17]Creative Commons License
+   Stephen Brennan's Blog is licensed under a [18]Creative Commons
+   Attribution-ShareAlike 4.0 International License
+
+References
+
+   Visible links:
+   1. https://brennan.io/
+   2. https://brennan.io/blog
+   3. https://brennan.io/projects
+   4. https://brennan.io/resume
+   5. https://github.com/brenns10/lsh
+   6. https://brennan.io/2016/03/29/dishonesty/
+   7. https://brennan.io/2015/01/16/write-a-shell-in-c/#fn:1
+   8. https://github.com/brenns10/lsh/tree/407938170e8b40d231781576e05282a41634848c
+   9. http://pubs.opengroup.org/onlinepubs/9699919799/
+  10. https://github.com/brenns10/lsh/commit/486ec6dcdd1e11c6dc82f482acda49ed18be11b5
+  11. http://pubs.opengroup.org/onlinepubs/9699919799/functions/getline.html
+  12. https://github.com/harishankarv
+  13. https://github.com/brenns10/lsh/issues/14
+  14. https://brennan.io/2015/01/16/write-a-shell-in-c/#fnref:1
+  15. https://brennan.io/legal
+  16. https://brennan.io/blog/rss.xml
+  17. http://creativecommons.org/licenses/by-sa/4.0/
+  18. http://creativecommons.org/licenses/by-sa/4.0/
+
+   Hidden links:
+  20. https://www.facebook.com/sharer/sharer.php?u=https://brennan.io/2015/01/16/write-a-shell-in-c/
+  21. http://twitter.com/share?text=Tutorial%20-%20Write%20a%20Shell%20in%20C&url=https://brennan.io/2015/01/16/write-a-shell-in-c/
+  22. http://news.ycombinator.com/submitlink?u=https://brennan.io/2015/01/16/write-a-shell-in-c/&t=Tutorial%20-%20Write%20a%20Shell%20in%20C
+  23. https://www.reddit.com/submit?title=Tutorial%20-%20Write%20a%20Shell%20in%20C&url=https://brennan.io/2015/01/16/write-a-shell-in-c/
diff --git a/doc/developer_ibm_com_articles_l_gas_nasm.txt b/doc/developer_ibm_com_articles_l_gas_nasm.txt
new file mode 100644
index 0000000..8d0850d
--- /dev/null
+++ b/doc/developer_ibm_com_articles_l_gas_nasm.txt
@@ -0,0 +1,544 @@
+Linux assemblers: A comparison of GAS and NASM
+A side-by-side look at GNU Assembler (GAS) and Netwide Assembler (NASM)
+ Save
+ Like
+By Ram Narayan
+Published October 17, 2007
+
+Introduction
+Unlike other languages, assembly programming involves understanding the 
+processor architecture of the machine that is being programmed. Assembly 
+programs are not at all portable and are often cumbersome to maintain and 
+understand, and can often contain a large number of lines of code. But with 
+these limitations comes the advantage of speed and size of the runtime binary 
+that executes on that machine.
+
+Though much information is already available on assembly level programming on 
+Linux, this article aims to more specifically show the differences between 
+syntaxes in a way that will help you more easily convert from one flavor of 
+assembly to the another. The article evolved from my own quest to improve at 
+this conversion.
+
+This article uses a series of program examples. Each program illustrates some 
+feature and is followed by a discussion and comparison of the syntaxes. 
+Although it’s not possible to cover every difference that exists between 
+NASM and GAS, I do try to cover the main points and provide a foundation for 
+further investigation. And for those already familiar with both NASM and GAS, 
+you might still find something useful here, such as macros.
+
+This article assumes you have at least a basic understanding of assembly 
+terminology and have programmed with an assembler using Intel® syntax, 
+perhaps using NASM on Linux or Windows. This article does not teach how to 
+type code into an editor or how to assemble and link. You should be familiar 
+with the Linux operating system (any Linux distribution will do; I used Red 
+Hat and Slackware) and basic GNU tools such as gcc and ld, and you should be 
+programming on an x86 machine.
+
+Now I’ll describe what this article does and does not cover.
+
+Building the examples
+
+Assembling:
+GAS:
+as –o program.o program.s
+
+NASM:
+nasm –f elf –o program.o program.asm
+
+Linking (common to both kinds of assembler):
+ld –o program program.o
+
+Linking when an external C library is to be used:
+ld –-dynamic-linker /lib/ld-linux.so.2 –lc –o program program.o
+
+This article covers:
+
+Basic syntactical differences between NASM and GAS
+Common assembly level constructs such as variables, loops, labels, and macros
+A bit about calling external C routines and using functions
+Assembly mnemonic differences and usage
+Memory addressing methods
+This article does not cover:
+
+The processor instruction set
+Various forms of macros and other constructs particular to an assembler
+Assembler directives peculiar to either NASM or GAS
+Features that are not commonly used or are found only in one assembler but not 
+in the other
+For more information, refer to the official assembler manuals (see resources 
+section in the right for links), as those are the most complete sources of 
+information.
+
+Basic structure
+Listing 1 shows a very simple program that simply exits with an exit code of 
+2. This little program describes the basic structure of an assembly program 
+for both GAS and NASM.
+
+Line	NASM	GAS
+001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016	; Text segment 
+begins section .text global _start ; Program entry point _start: ; Put the 
+code number for system call mov eax, 1 ; Return value mov ebx, 2 ; Call the OS 
+int 80h	# Text segment begins .section .text .globl _start # Program entry 
+point _start: # Put the code number for system call movl $1, %eax /* Return 
+value */ movl $2, %ebx # Call the OS int $0x80
+Listing 1. A program that exits with an exit code of 2
+
+Now for a bit of explanation.
+
+One of the biggest differences between NASM and GAS is the syntax. GAS uses 
+the AT&T syntax, a relatively archaic syntax that is specific to GAS and some 
+older assemblers, whereas NASM uses the Intel syntax, supported by a majority 
+of assemblers such as TASM and MASM. (Modern versions of GAS do support a 
+directive called .intel_syntax, which allows the use of Intel syntax with GAS.)
+
+The following are some of the major differences summarized from the GAS manual:
+
+AT&T and Intel syntax use the opposite order for source and destination 
+operands. For example:
+
+Intel: mov eax, 4
+AT&T: movl $4, %eax
+In AT&T syntax, immediate operands are preceded by $; in Intel syntax, 
+immediate operands are not. For example:
+
+Intel: push 4
+AT&T: pushl $4
+In AT&T syntax, register operands are preceded by %; in Intel syntax, they are 
+not.
+In AT&T syntax, the size of memory operands is determined from the last 
+character of the opcode name. Opcode suffixes of b, w, and l specify byte 
+(8-bit), word (16-bit), and long (32-bit) memory references. Intel syntax 
+accomplishes this by prefixing memory operands (not the opcodes themselves) 
+with byte ptr, word ptr, and dword ptr. Thus:
+
+Intel: mov al, byte ptr foo
+AT&T: movb foo, %al
+Immediate form long jumps and calls are lcall/ljmp $section, $offset in AT&T 
+syntax; the Intel syntax is call/jmp far section:offset. The far return 
+instruction is lret $stack-adjust in AT&T syntax, whereas Intel uses ret far 
+stack-adjust.
+In both the assemblers, the names of registers remain the same, but the syntax 
+for using them is different as is the syntax for addressing modes. In 
+addition, assembler directives in GAS begin with a “.”, but not in NASM.
+
+The .text section is where the processor begins code execution. The global 
+(also .globl or .global in GAS) keyword is used to make a symbol visible to 
+the linker and available to other linking object modules. On the NASM side of 
+Listing 1, global _start marks the symbol _start as a visible identifier so 
+the linker knows where to jump into the program and begin execution. As with 
+NASM, GAS looks for this _start label as the default entry point of a program. 
+A label always ends with a colon in both GAS and NASM.
+
+Interrupts are a way to inform the OS that its services are required. The int 
+instruction in line 16 does this job in our program. Both GAS and NASM use the 
+same mnemonic for interrupts. GAS uses the 0x prefix to specify a hex number, 
+whereas NASM uses the h suffix. Because immediate operands are prefixed with $ 
+in GAS, 80 hex is $0x80.
+
+int $0x80 (or 80h in NASM) is used to invoke Linux and request a service. The 
+service code is present in the EAX register. A value of 1 (for the Linux exit 
+system call) is stored in EAX to request that the program exit. Register EBX 
+contains the exit code (2, in our case), a number that is returned to the OS. 
+(You can track this number by typing echo $? at the command prompt.)
+
+Finally, a word about comments. GAS supports both C style (/* */), C++ style 
+(//), and shell style (#) comments. NASM supports single-line comments that 
+begin with the “;” character.
+
+Variables and accessing memory
+This section begins with an example program that finds the largest of three 
+numbers.
+
+Line	NASM	GAS
+001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 
+020 021 022 023 024 025 026 027 028 029 030 031	; Data section begins section 
+.data var1 dd 40 var2 dd 20 var3 dd 30 section .text global _start _start: ; 
+Move the contents of variables mov ecx, [var1] cmp ecx, [var2] jg 
+check_third_var mov ecx, [var2] check_third_var: cmp ecx, [var3] jg _exit mov 
+ecx, [var3] _exit: mov eax, 1 mov ebx, ecx int 80h	// Data section begins 
+.section .data var1: .int 40 var2: .int 20 var3: .int 30 .section .text .globl 
+_start _start: # move the contents of variables movl (var1), %ecx cmpl (var2), 
+%ecx jg check_third_var movl (var2), %ecx check_third_var: cmpl (var3), %ecx 
+jg _exit movl (var3), %ecx _exit: movl $1, %eax movl %ecx, %ebx int $0x80
+Listing 2. A program that finds the maximum of three numbers
+
+You can see several differences above in the declaration of memory variables. 
+NASM uses the dd, dw, and db directives to declare 32-, 16-, and 8-bit 
+numbers, respectively, whereas GAS uses the .long, .int, and .byte for the 
+same purpose. GAS has other directives too, such as .ascii, .asciz, and 
+.string. In GAS, you declare variables just like other labels (using a colon), 
+but in NASM you simply type a variable name (without the colon) before the 
+memory allocation directive (dd, dw, etc.), followed by the value of the 
+variable.
+
+Line 18 in Listing 2 illustrates the memory indirect addressing mode. NASM 
+uses square brackets to dereference the value at the address pointed to by a 
+memory location: [var1]. GAS uses a circular brace to dereference the same 
+value: (var1). The use of other addressing modes is covered later in this 
+article.
+
+Using macros
+Listing 3 illustrates the concepts of this section; it accepts the user’s 
+name as input and returns a greeting.
+
+Line	NASM	GAS
+001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 
+020 021 022 023 024 025 026 027 028 029 030 031 032 033 034 035 036 037 038 
+039 040 041 042 043 044 045 046 047 048 049 050 051 052 053 054 055 056 057 
+058 059 060 061 062	section .data prompt_str db 'Enter your name: ' ; $ is 
+the location counter STR_SIZE equ $ - prompt_str greet_str db 'Hello ' 
+GSTR_SIZE equ $ - greet_str section .bss ; Reserve 32 bytes of memory buff 
+resb 32 ; A macro with two parameters ; Implements the write system call 
+%macro write 2 mov eax, 4 mov ebx, 1 mov ecx, %1 mov edx, %2 int 80h %endmacro 
+; Implements the read system call %macro read 2 mov eax, 3 mov ebx, 0 mov ecx, 
+%1 mov edx, %2 int 80h %endmacro section .text global _start _start: write 
+prompt_str, STR_SIZE read buff, 32 ; Read returns the length in eax push eax ; 
+Print the hello text write greet_str, GSTR_SIZE pop edx ; edx = length 
+returned by read write buff, edx _exit: mov eax, 1 mov ebx, 0 int 80h	
+.section .data prompt_str: .ascii "Enter Your Name: " pstr_end: .set STR_SIZE, 
+pstr_end - prompt_str greet_str: .ascii "Hello " gstr_end: .set GSTR_SIZE, 
+gstr_end - greet_str .section .bss // Reserve 32 bytes of memory .lcomm buff, 
+32 // A macro with two parameters // implements the write system call .macro 
+write str, str_size movl $4, %eax movl $1, %ebx movl \str, %ecx movl 
+\str_size, %edx int $0x80 .endm // Implements the read system call .macro read 
+buff, buff_size movl $3, %eax movl $0, %ebx movl \buff, %ecx movl \buff_size, 
+%edx int $0x80 .endm .section .text .globl _start _start: write $prompt_str, 
+$STR_SIZE read $buff, $32 // Read returns the length in eax pushl %eax // 
+Print the hello text write $greet_str, $GSTR_SIZE popl %edx // edx = length 
+returned by read write $buff, %edx _exit: movl $1, %eax movl $0, %ebx int $0x80
+Listing 3. A program to read a string and display a greeting to the user
+
+The heading for this section promises a discussion of macros, and both NASM 
+and GAS certainly support them. But before we get into macros, a few other 
+features are worth comparing.
+
+Listing 3 illustrates the concept of uninitialized memory, defined using the 
+.bss section directive (line 14). BSS stands for “block storage segment” 
+(originally, “block started by symbol”), and the memory reserved in the 
+BSS section is initialized to zero during the start of the program. Objects in 
+the BSS section have only a name and a size, and no value. Variables declared 
+in the BSS section don’t actually take space, unlike in the data segment.
+
+NASM uses the resb, resw, and resd keywords to allocated byte, word, and dword 
+space in the BSS section. GAS, on the other hand, uses the .lcomm keyword to 
+allocate byte-level space. Notice the way the variable name is declared in 
+both versions of the program. In NASM the variable name precedes the resb (or 
+resw or resd) keyword, followed by the amount of space to be reserved, whereas 
+in GAS the variable name follows the .lcomm keyword, which is then followed by 
+a comma and then the amount of space to be reserved. This shows the difference:
+
+NASM: varname resb size
+
+GAS: .lcomm varname, size
+
+Listing 2 also introduces the concept of a location counter (line 6). NASM 
+provides a special variable (the $ and $$ variables) to manipulate the 
+location counter. In GAS, there is no method to manipulate the location 
+counter and you have to use labels to calculate the next storage location 
+(data, instruction, etc.).
+
+For example, to calculate the length of a string, you would use the following 
+idiom in NASM:
+
+prompt_str db 'Enter your name: ' STR_SIZE equ $ - prompt_str ; $ is the 
+location counter
+
+The $ gives the current value of the location counter, and subtracting the 
+value of the label (all variable names are labels) from this location counter 
+gives the number of bytes present between the declaration of the label and the 
+current location. The equ directive is used to set the value of the variable 
+STR_SIZE to the expression following it. A similar idiom in GAS looks like 
+this:
+
+prompt_str: .ascii "Enter Your Name: " pstr_end: .set STR_SIZE, pstr_end - 
+prompt_str
+
+The end label (pstr_end) gives the next location address, and subtracting the 
+starting label address gives the size. Also note the use of .set to initialize 
+the value of the variable STR_SIZE to the expression following the comma. A 
+corresponding .equ can also be used. There is no alternative to GAS’s set 
+directive in NASM.
+
+As I mentioned, Listing 3 uses macros (line 21). Different macro techniques 
+exist in NASM and GAS, including single-line macros and macro overloading, but 
+I only deal with the basic type here. A common use of macros in assembly is 
+clarity. Instead of typing the same piece of code again and again, you can 
+create reusable macros that both avoid this repetition and enhance the look 
+and readability of the code by reducing clutter.
+
+NASM users might be familiar with declaring macros using the %beginmacro 
+directive and ending them with an %endmacro directive. A %beginmacro directive 
+is followed by the macro name. After the macro name comes a count, the number 
+of macro arguments the macro is supposed to have. In NASM, macro arguments are 
+numbered sequentially starting with 1. That is, the first argument to a macro 
+is %1, the second is %2, the third is %3, and so on. For example:
+
+%beginmacro macroname 2 mov eax, %1 mov ebx, %2 %endmacro
+
+This creates a macro with two arguments, the first being %1 and the second 
+being %2. Thus, a call to the above macro would look something like this:
+
+macroname 5, 6
+
+Macros can also be created without arguments, in which case they don’t 
+specify any number.
+
+Now let’s take a look at how GAS uses macros. GAS provides the .macro and 
+.endm directives to create macros. A .macro directive is followed by a macro 
+name, which may or may not have arguments. In GAS, macro arguments are given 
+by name. For example:
+
+.macro macroname arg1, arg2 movl \arg1, %eax movl \arg2, %ebx .endm
+
+A backslash precedes the name of each argument of the macro when the name is 
+actually used inside a macro. If this is not done, the linker would treat the 
+names as labels rather then as arguments and will report an error.
+
+Functions, external routines, and the stack
+The example program for this section implements a selection sort on an array 
+of integers.
+
+Line	NASM	GAS
+001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 
+020 021 022 023 024 025 026 027 028 029 030 031 032 033 034 035 036 037 038 
+039 040 041 042 043 044 045 046 047 048 049 050 051 052 053 054 055 056 057 
+058 059 060 061 062 063 064 065 066 067 068 069 070 071 072 073 074 075 076 
+077 078 079 080 081 082 083 084 085 086 087 088 089 090 091 092 093 094 095 
+096 097 098 099 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 
+115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 
+134 135 136 137 138 139 140 141 142 143 144 145	section .data array db 89, 10, 
+67, 1, 4, 27, 12, 34, 86, 3 ARRAY_SIZE equ $ - array array_fmt db " %d", 0 
+usort_str db "unsorted array:", 0 sort_str db "sorted array:", 0 newline db 
+10, 0 section .text extern puts global _start _start: push usort_str call puts 
+add esp, 4 push ARRAY_SIZE push array push array_fmt call print_array10 add 
+esp, 12 push ARRAY_SIZE push array call sort_routine20 ; Adjust the stack 
+pointer add esp, 8 push sort_str call puts add esp, 4 push ARRAY_SIZE push 
+array push array_fmt call print_array10 add esp, 12 jmp _exit extern printf 
+print_array10: push ebp mov ebp, esp sub esp, 4 mov edx, [ebp + 8] mov ebx, 
+[ebp + 12] mov ecx, [ebp + 16] mov esi, 0 push_loop: mov [ebp - 4], ecx mov 
+edx, [ebp + 8] xor eax, eax mov al, byte [ebx + esi] push eax push edx call 
+printf add esp, 8 mov ecx, [ebp - 4] inc esi loop push_loop push newline call 
+printf add esp, 4 mov esp, ebp pop ebp ret sort_routine20: push ebp mov ebp, 
+esp ; Allocate a word of space in stack sub esp, 4 ; Get the address of the 
+array mov ebx, [ebp + 8] ; Store array size mov ecx, [ebp + 12] dec ecx ; 
+Prepare for outer loop here xor esi, esi outer_loop: ; This stores the min 
+index mov [ebp - 4], esi mov edi, esi inc edi inner_loop: cmp edi, ARRAY_SIZE 
+jge swap_vars xor al, al mov edx, [ebp - 4] mov al, byte [ebx + edx] cmp byte 
+[ebx + edi], al jge check_next mov [ebp - 4], edi check_next: inc edi jmp 
+inner_loop swap_vars: mov edi, [ebp - 4] mov dl, byte [ebx + edi] mov al, byte 
+[ebx + esi] mov byte [ebx + esi], dl mov byte [ebx + edi], al inc esi loop 
+outer_loop mov esp, ebp pop ebp ret _exit: mov eax, 1 mov ebx, 0 int 
+80h	.section .data array: .byte 89, 10, 67, 1, 4, 27, 12, 34, 86, 3 
+array_end: .equ ARRAY_SIZE, array_end - array array_fmt: .asciz " %d" 
+usort_str: .asciz "unsorted array:" sort_str: .asciz "sorted array:" newline: 
+.asciz "\n" .section .text .globl _start _start: pushl $usort_str call puts 
+addl $4, %esp pushl $ARRAY_SIZE pushl $array pushl $array_fmt call 
+print_array10 addl $12, %esp pushl $ARRAY_SIZE pushl $array call 
+sort_routine20 # Adjust the stack pointer addl $8, %esp pushl $sort_str call 
+puts addl $4, %esp pushl $ARRAY_SIZE pushl $array pushl $array_fmt call 
+print_array10 addl $12, %esp jmp _exit print_array10: pushl %ebp movl %esp, 
+%ebp subl $4, %esp movl 8(%ebp), %edx movl 12(%ebp), %ebx movl 16(%ebp), %ecx 
+movl $0, %esi push_loop: movl %ecx, -4(%ebp) movl 8(%ebp), %edx xorl %eax, 
+%eax movb (%ebx, %esi, 1), %al pushl %eax pushl %edx call printf addl $8, %esp 
+movl -4(%ebp), %ecx incl %esi loop push_loop pushl $newline call printf addl 
+$4, %esp movl %ebp, %esp popl %ebp ret sort_routine20: pushl %ebp movl %esp, 
+%ebp # Allocate a word of space in stack subl $4, %esp # Get the address of 
+the array movl 8(%ebp), %ebx # Store array size movl 12(%ebp), %ecx decl %ecx 
+# Prepare for outer loop here xorl %esi, %esi outer_loop: # This stores the 
+min index movl %esi, -4(%ebp) movl %esi, %edi incl %edi inner_loop: cmpl 
+$ARRAY_SIZE, %edi jge swap_vars xorb %al, %al movl -4(%ebp), %edx movb (%ebx, 
+%edx, 1), %al cmpb %al, (%ebx, %edi, 1) jge check_next movl %edi, -4(%ebp) 
+check_next: incl %edi jmp inner_loop swap_vars: movl -4(%ebp), %edi movb 
+(%ebx, %edi, 1), %dl movb (%ebx, %esi, 1), %al movb %dl, (%ebx, %esi, 1) movb 
+%al, (%ebx, %edi, 1) incl %esi loop outer_loop movl %ebp, %esp popl %ebp ret 
+_exit: movl $1, %eax movl $0, %ebx int $0x80
+Listing 4. Implementation of selection sort on an integer array
+
+Listing 4 might look overwhelming at first, but in fact it’s very simple. 
+The listing introduces the concept of functions, various memory addressing 
+schemes, the stack and the use of a library function. The program sorts an 
+array of 10 numbers and uses the external C library functions puts and printf 
+to print out the entire contents of the unsorted and sorted array. For 
+modularity and to introduce the concept of functions, the sort routine itself 
+is implemented as a separate procedure along with the array print routine. 
+Let’s deal with them one by one.
+
+After the data declarations, the program execution begins with a call to puts 
+(line 31). The puts function displays a string on the console. Its only 
+argument is the address of the string to be displayed, which is passed on to 
+it by pushing the address of the string in the stack (line 30).
+
+In NASM, any label that is not part of our program and needs to be resolved 
+during link time must be predefined, which is the function of the extern 
+keyword (line 24). GAS doesn’t have such requirements. After this, the 
+address of the string usort_str is pushed onto the stack (line 30). In NASM, a 
+memory variable such as usort_str represents the address of the memory 
+location itself, and thus a call such as push usort_str actually pushes the 
+address on top of the stack. In GAS, on the other hand, the variable usort_str 
+must be prefixed with $, so that it is treated as an immediate address. If 
+it’s not prefixed with $, the actual bytes represented by the memory 
+variable are pushed onto the stack instead of the address.
+
+Since pushing a variable essentially moves the stack pointer by a dword, the 
+stack pointer is adjusted by adding 4 (the size of a dword) to it (line 32).
+
+Three arguments are now pushed onto the stack, and the print_array10 function 
+is called (line 37). Functions are declared the same way in both NASM and GAS. 
+They are nothing but labels, which are invoked using the call instruction.
+
+After a function call, ESP represents the top of the stack. A value of esp + 4 
+represents the return address, and a value of esp + 8 represents the first 
+argument to the function. All subsequent arguments are accessed by adding the 
+size of a dword variable to the stack pointer (that is, esp + 12, esp + 16, 
+and so on).
+
+Once inside a function, a local stack frame is created by copying esp to ebp 
+(line 62). You can also allocate space for local variables as is done in the 
+program (line 63). You do this by subtracting the number of bytes required 
+from esp. A value of esp – 4 represents a space of 4 bytes allocated for a 
+local variable, and this can continue as long as there is enough space in the 
+stack to accommodate your local variables.
+
+Listing 4 illustrates the base indirect addressing mode (line 64), so called 
+because you start with a base address and add an offset to it to arrive at a 
+final address. On the NASM side of the listing, [ebp + 8] is one such example, 
+as is [ebp – 4] (line 71). In GAS, the addressing is a bit more terse: 
+4(%ebp) and -4(%ebp), respectively.
+
+In the print_array10 routine, you can see another kind of addressing mode 
+being used after the push_loop label (line 74). The line is represented in 
+NASM and GAS, respectively, like so:
+
+NASM: mov al, byte [ebx + esi]
+
+GAS: movb (%ebx, %esi, 1), %al
+
+This addressing mode is the base indexed addressing mode. Here, there are 
+three entities: one is the base address, the second is the index register, and 
+the third is the multiplier. Because it’s not possible to determine the 
+number of bytes to be accessed from a memory location, a method is needed to 
+find out the amount of memory addressed. NASM uses the byte operator to tell 
+the assembler that a byte of data is to be moved. In GAS the same problem is 
+solved by using a multiplier as well as using the b, w, or l suffix in the 
+mnemonic (for example, movb). The syntax of GAS can seem somewhat complex when 
+first encountered.
+
+The general form of base indexed addressing in GAS is as follows:
+
+%segment:ADDRESS (, index, multiplier)
+
+or
+
+%segment:(offset, index, multiplier)
+
+or
+
+%segment:ADDRESS(base, index, multiplier)
+
+The final address is calculated using this formula:
+
+ADDRESS or offset + base + index * multiplier.
+
+Thus, to access a byte, a multiplier of 1 is used, for a word, 2, and for a 
+dword, 4. Of course, NASM uses a simpler syntax. Thus, the above in NASM would 
+be represented like so:
+
+Segment:[ADDRESS or offset + index * multiplier]
+
+A prefix of byte, word, or dword is used before this memory address to access 
+1, 2, or 4 bytes of memory, respectively.
+
+Leftovers
+Line	NASM	GAS
+001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 
+020 021 022 023 024 025 026 027 028 029 030 031 032 033 034 035 036 037 038 
+039 040 041 042 043 044 045 046 047 048 049 050 051 052 053 054 055 056 057 
+058 059 060 061	section .data ; Command table to store at most ; 10 command 
+line arguments cmd_tbl: %rep 10 dd 0 %endrep section .text global _start 
+_start: ; Set up the stack frame mov ebp, esp ; Top of stack contains the ; 
+number of command line arguments. ; The default value is 1 mov ecx, [ebp] ; 
+Exit if arguments are more than 10 cmp ecx, 10 jg _exit mov esi, 1 mov edi, 0 
+; Store the command line arguments ; in the command table store_loop: mov eax, 
+[ebp + esi * 4] mov [cmd_tbl + edi * 4], eax inc esi inc edi loop store_loop 
+mov ecx, edi mov esi, 0 extern puts print_loop: ; Make some local space sub 
+esp, 4 ; puts function corrupts ecx mov [ebp - 4], ecx mov eax, [cmd_tbl + esi 
+* 4] push eax call puts add esp, 4 mov ecx, [ebp - 4] inc esi loop print_loop 
+jmp _exit _exit: mov eax, 1 mov ebx, 0 int 80h	.section .data // Command 
+table to store at most // 10 command line arguments cmd_tbl: .rept 10 .long 0 
+.endr .section .text .globl _start _start: // Set up the stack frame movl 
+%esp, %ebp // Top of stack contains the // number of command line arguments. 
+// The default value is 1 movl (%ebp), %ecx // Exit if arguments are more than 
+10 cmpl $10, %ecx jg _exit movl $1, %esi movl $0, %edi // Store the command 
+line arguments // in the command table store_loop: movl (%ebp, %esi, 4), %eax 
+movl %eax, cmd_tbl( , %edi, 4) incl %esi incl %edi loop store_loop movl %edi, 
+%ecx movl $0, %esi print_loop: // Make some local space subl $4, %esp // puts 
+functions corrupts ecx movl %ecx, -4(%ebp) movl cmd_tbl( , %esi, 4), %eax 
+pushl %eax call puts addl $4, %esp movl -4(%ebp), %ecx incl %esi loop 
+print_loop jmp _exit _exit: movl $1, %eax movl $0, %ebx int $0x80
+Listing 5. A program that reads command line arguments, stores them in memory, 
+and prints them
+
+Listing 5 shows a construct that repeats instructions in assembly. Naturally 
+enough, it’s called the repeat construct. In GAS, the repeat construct is 
+started using the .rept directive (line 6). This directive has to be closed 
+using an .endr directive (line 8). .rept is followed by a count in GAS that 
+specifies the number of times the expression enclosed inside the .rept/.endr 
+construct is to be repeated. Any instruction placed inside this construct is 
+equivalent to writing that instruction count number of times, each on a 
+separate line.
+
+For example, for a count of 3:
+
+.rept 3 movl $2, %eax .endr
+
+This is equivalent to:
+
+movl $2, %eax movl $2, %eax movl $2, %eax
+
+In NASM, a similar construct is used at the preprocessor level. It begins with 
+the %rep directive and ends with %endrep. The %rep directive is followed by an 
+expression (unlike in GAS where the .rept directive is followed by a count):
+
+%rep <expression> nop %endrep
+
+There is also an alternative in NASM, the times directive. Similar to %rep, it 
+works at the assembler level, and it, too, is followed by an expression. For 
+example, the above %rep construct is equivalent to this:
+
+times <expression> nop
+
+And this:
+
+%rep 3 mov eax, 2 %endrep
+
+is equivalent to this:
+
+times 3 mov eax, 2
+
+and both are equivalent to this:
+
+mov eax, 2 mov eax, 2 mov eax, 2
+
+In Listing 5, the .rept (or %rep) directive is used to create a memory data 
+area for 10 double words. The command line arguments are then accessed one by 
+one from the stack and stored in the memory area until the command table gets 
+full.
+
+As for command line arguments, they are accessed similarly with both 
+assemblers. ESP or the top of the stack stores the number of command line 
+arguments supplied to a program, which is 1 by default (for no command line 
+arguments). esp + 4 stores the first command line argument, which is always 
+the name of the program that was invoked from the command line. esp + 8, esp + 
+12, and so on store subsequent command line arguments.
+
+Also watch the way the memory command table is being accessed on both sides in 
+Listing 5. Here, memory indirect addressing mode (line 33) is used to access 
+the command table along with an offset in ESI (and EDI) and a multiplier. 
+Thus, [cmd_tbl + esi * 4] in NASM is equal to cmd_tbl(, %esi, 4) in GAS.
+
+Conclusion
+Even though the differences between these two assemblers are substantial, 
+it’s not that difficult to convert from one form to another. You might find 
+that the AT&T syntax seems at first difficult to understand, but once 
+mastered, it’s as simple as the Intel syntax.
+
diff --git a/doc/forum.osdev.org_viewtopic.php_f=2_t=33362.txt b/doc/forum.osdev.org_viewtopic.php_f=2_t=33362.txt
new file mode 100644
index 0000000..505d05b
--- /dev/null
+++ b/doc/forum.osdev.org_viewtopic.php_f=2_t=33362.txt
@@ -0,0 +1,1015 @@
+   #[1]Feed - OSDev.org [2]Feed - News [3]Feed - All forums [4]Feed - New
+   Topics [5]Feed - Active Topics [6]Feed - Forum - Announcements, Test
+   Requests, & Job openings [7]Feed - Topic - BOOTBOOT Multi-platform
+   Micro-kernel Loader
+
+OSDev.org
+
+   The Place to Start for Operating System Developers
+   [8]* Login   [9] * Register [10]The OSDev.org Wiki - Got a question?
+   Search this first! [11]* FAQ    [12] * Search
+   It is currently Sat Jan 13, 2024 5:46 pm
+
+   [13]View unanswered posts | [14]View active topics
+
+   [15]Board index � [16]Operating System Development � [17]Announcements,
+   Test Requests, & Job openings
+
+   All times are UTC - 6 hours
+
+[18]BOOTBOOT Multi-platform Micro-kernel Loader
+
+   Moderators: [19]AJ, [20]01000101, [21]carbonBased, [22]Candy,
+   [23]pcmattman, [24]JAAman, [25]Octocontrabass, [26]klange, [27]sortie,
+   [28]kmcguire, [29]thepowersgang, [30]chase, [31]Combuster, [32]Owen
+[33]Post new topic  [34] Reply to topic  Page 1 of 3
+ [ 39 posts ]                           [35]Go to page 1, [36]2, [37]3  [38]Next
+     [39]Print view [40]Previous topic | [41]Next topic
+   Author Message
+   bzt
+    Post subject: BOOTBOOT Multi-platform Micro-kernel Loader
+   [42]Post Posted: Fri Dec 07, 2018 7:34 pm
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   [43]BOOTBOOT Protocol
+   Unlike any existing boot loaders, this loader is not one big bloated
+   system. Quite the contrary, it's a set of several independent, very
+   thin implementations, all providing the same C/C++ compatible,
+   higher-half 64 bit environment on their corresponding platforms. By
+   very thin I mean really lightweight, no more than a few kilobytes each:
+   boot.bin (512 bytes), bootboot.bin (11k), bootboot.img (30k),
+   bootboot.efi (93k)
+   It is ideal for hobby OS developers, as it's easy to use and it's very
+   flexible. It's much easier to dealt with than GRUB, and also unlike
+   with Multiboot you won't need any long mode Assembly trampoline code
+   with dirty GDT and paging tricks in your kernel.
+   BOOTBOOT can load your higher half, 64 bit-only C/C++ kernel just as-is
+   without any hacks. The repository also contains full documentation in
+   MD and PDF formats, as well as small ANSI C mkboot utilities to help
+   you with the installation. It's a Free and Open Source Sotfware,
+   licensed under the terms of MIT license. If you want to use it as a
+   reference for your own boot loader, the PDF documentation has a
+   detailed description on which firmware interfaces have been used, and
+   also the sources are well commented.
+   The BOOTBOOT Protocol is very easy to integrate, as it has a totally
+   architecture agnostic interface. You just specify some object addresses
+   in your [44]linker script, and you're done. The information structure
+   is defined in a [45]C/C++ header file and can be used on all platforms.
+   It contains information such as boot date and time, timezone, frame
+   buffer dimension, platform independent memory map, initrd size,
+   pointers to detected system tables (efi, acpi, mp, smbios etc.). Unlike
+   other protocols, BOOTBOOT was written with forward-compatibility in
+   mind. That is, current implementations support static addresses for
+   those variables (Protocol Level 1), but it states that future versions
+   (Level 2) should read the symbol table of the kernel to find those
+   addresses. Kernels written for the Level 1 loaders will be able to boot
+   with the upcoming Level 2 loaders just out-of-the-box.
+   The loader is capable of loading monolithic kernels, but mainly focuses
+   on micro-kernels with an initrd. It can load the OS from several
+   different sources: from ROM, over serial line, from a GPT partition, or
+   from a file on a FAT16/32 GPT partition (usually ESP. To avoid
+   licensing issues with M$, it's limited to upper case 8+3 filenames).
+   The protocol also allows booting over network with TFTP, although the
+   reference implementations do not use that (not yet :-) ).
+   Current implementations support kernels in ELF64 and PE32+ formats for
+   the AArch64 and x86_64 architectures. As for the initrd, they support
+   - statically linked executables (for monolithic kernels and statically
+   linked micro-kernels like Minix)
+   - cpio (all variants: old hp, newc, and crc too. The latter is used by
+   the Linux kernel btw.)
+   - tar (POSIX ustar, a very beginner friendly format)
+   - SFS (both Brendan's and BenLunt's versions)
+   - James Molloy's initrd (I assume you're already familiar with his
+   tutorial)
+   - FS/Z (my operating system's native file system format)
+   - any archive or file system format, provided your kernel file is
+   contiguous and is the first executable in the initrd.
+   The initrd can be gzip compressed, or optionally encrypted (FS/Z only
+   feature).
+   The repository contains an example [46]hello world kernel that
+   demonstrates how to write strings and boxes on the frame buffer in an
+   architecture independent way using PSF2 fonts. You are free to use that
+   example kernel as a skeleton for starting your own kernel. Kernels
+   written in C++ are also supported, but you have to provide a small code
+   block at _start that calls your constructors (easiest way is to use a
+   linker script to create .text.init or .text.ctors sections or
+   .init_array table).
+   How to install
+   Please note BOOTBOOT is very flexible. What I describe here is just one
+   way, which I believe to be the most common use case for hobby OS
+   developers.
+   1. create a disk image with GPT partitioning table ("dd if=/dev/zero
+   of=myimage.dd bs=1M count=256" and "fdisk myimage.dd")
+   2. set the first partition's type to ESP, and format it with FAT16 or
+   FAT32 (fdisk's type 1, and use "mkfs.vfat -F x")
+   3. mount that partition with a loop device ("sudo mount -o loop,user
+   myimage.dd somedir" or use "losetup"+"mount /dev/loop0")
+   4. create a BOOTBOOT directory there ("mkdir somedir/BOOTBOOT")
+   5. create an INITRD with your kernel in it (for example use "find . |
+   cpio -H hpodc -o | gzip >somedir/BOOTBOOT/INITRD", for monolithic
+   kernels, simply copy your kernel executable as INITRD).
+   6. optionally create a text file named BOOTBOOT/CONFIG (will be parsed
+   by your kernel)
+   7. copy bootboot.bin into that directory as BOOTBOOT/LOADER (I strongly
+   suggest to set the SYSTEM attribute for this file)
+   8. unmount the disk image
+   9. use the x86_64-bios/mkboot.c utility to install a (P)MBR sector on
+   your disk image ("./mkboot myimage.dd")
+   If you want more control, do the steps 1-6, and choose a loader
+   implementation for your platform and desired configuration (you can
+   also use multiple loaders in the same image to create multiplatform
+   bootable images). The documentation has detailed description of all of
+   these scenarios:
+   - Raspberry Pi 3 (AArch64)
+   - UEFI (x86_64)
+   - UEFI for embedded systems (x86_64, in ROM)
+   - GRUB Multiboot (x86_64)
+   - ISOLINUX / LILO / BOOTLIN / etc. (x86_64)
+   - Legacy BIOS boot with any arbitrary boot manager (x86_64, chainloaded
+   from VBR)
+   - Legacy BIOS boot with a single OS on a forward-compatible GPT disk
+   (x86_64, booted from MBR)
+   - Legacy BIOS for embedded systems (x86_64, in ROM)
+   - CDROM, in El Torito "no emulation" mode (x86_64, hybrid GPT/ISO9660
+   image)
+   For more information, read the documentation. Before you comment any
+   critism about this loader, please read the documentation. It's very
+   likely it can do what you think it can't do, as most features and
+   options are not mentioned in this brief introductory post.
+   Cheers,
+   bzt
+   [47]Top
+    [48] Profile
+
+   mihe
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [49]Post Posted: Sat Dec 15, 2018 3:33 am
+
+                                   Offline
+                                    Member
+                                   Member
+
+   Joined: Sun Oct 21, 2018 1:37 pm
+   Posts: 38
+   It looks very interesting, especially because I think I can find some
+   guidance for my project in some points where I a a bit stuck, or not
+   skilled enough to move forward comfortably, and you seem to cover.
+   Thanks for sharing this !
+   [50]Top
+    [51] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [52]Post Posted: Thu Feb 07, 2019 6:24 pm
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   Hi All,
+   I proudly present to you the final release of BOOTBOOT Protocol.
+   I've finished co-processor (SSE, Neon) initialization and SMP support
+   on all platforms. With those the feature set is now complete, meaning
+   from now on I'll only commit bugfixes (if any) and ports for new
+   platforms. Specification is now frozen, no further modification on the
+   specification will take place.
+   Multi Processing was tricky to achive without an interface, but finally
+   I have decided to use the simplest method: the same kernel is started
+   on all cores, only with different stacks. BIOS platform uses LAPIC IPI
+   + SIPI (up to 256 cores) and PIT for sleeping. UEFI utilizes the newer
+   PI EFI_MP_SERVICES_PROTOCOL (up to 1024 cores). If there's a need, I
+   was thinking about implementing a fallback to the older
+   FirmwareMPService Protocol (but I couldn't test it, so I skipped). And
+   last but not least, on RPi, well SMP is just working out-of-the-box
+   (with fixed 4 cores) :-)
+   The bootboot structure had to be changed a bit, few fields were
+   rearranged and bootboot.numcores came in (up to 65535 cores). Otherwise
+   the structure is the same, 64 bytes architecture independent info, 64
+   bytes platform specific pointers (which already included ACPI and MP
+   pointers), and the rest is the platform independent memory map.
+   Naturally I have updated the PDF documentation as well, and I've
+   created an OSDev wiki page for [53]BOOTBOOT. Happy OS development and I
+   hope my loader will be useful to you too!
+   Cheers,
+   bzt
+   [54]Top
+    [55] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [56]Post Posted: Thu Apr 09, 2020 5:02 am
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   Hi All,
+   Lots and lots (and lots) of testing on real hardware. I'd like to say
+   thanks to all the testers! Testing the multicore support under UEFI was
+   extremely helpful.
+   Some minor modifications had to be made: instead of PIT, the BIOS
+   version now uses the PS/2 port oscillator for delays. Also minor fixes
+   in Multiboot and Linux boot protocol support, they both work perfectly
+   now.
+   On Raspberry Pi, model 3 was working already, but now model 4 support
+   has been added too. Since the firmware code changed, multicore support
+   had to be changed a bit too, but now SMP works with the latest firmware
+   as well.
+   Furthermore, I've added [57]sample bootable images and a very simple
+   image creator tool, that can generate hybrid GPT/ISO9660 images with
+   your inird. I've also provided a Makefile with several rules for
+   testing (booting the images from ROM, via BIOS, via GRUB, from UEFI
+   CDROM etc. etc. etc.)
+   Cheers,
+   bzt
+   [58]Top
+    [59] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [60]Post Posted: Mon Jun 15, 2020 8:48 pm
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   Hi All,
+   New features (sort of). Protocol level 2 is now implemented in the
+   reference implementations (UEFI and Raspberry Pi, BIOS remained at
+   level 1). This means you are no longer tied to static link addresses,
+   you are free to move them around (in the higher half -1G to 0 range).
+   The loader will parse your ELF or PE kernel's symbols to find the
+   addresses where to map the data. The affected labels:
+     * mmio - virtual address of the MMIO area (on platforms that supports
+       that),
+     * fb - virtual address of the linear framebuffer,
+     * bootboot - the main information structure,
+     * environment - the arguments to your kernel in an UTF-8 string
+     * your kernel's segment (using Elf_Phdr->p_vaddr and
+       PE_hdr->code_base).
+
+   There can be one loadable segment (concatenated code, data and bss
+   sections) in the kernel. With level 2, the size limit for this is
+   raised to 16M (in comparition level 1 has a limit of 2M for info,
+   environment, code, data, bss and stack). That must be enough for
+   monolithic kernels too. You must be careful where you put these,
+   because these might overlap, and there's absolutely no checks (it is
+   perfectly valid for a kernel to request mapping of bootboot struct and
+   the environment string into the middle of its bss section for example.)
+   The repo contains a simple, dependency-free image creator tool, mkimg.
+   With that now you can create ESP FAT partition images and hybrid,
+   bootable GPT disk / CDROM / DVD images. This means all disk generation
+   related issues can be solved using this single file utility, no third
+   party tools needed. Just like the loaders, you are free to use this
+   tool with your project.
+   And last but not least, besides of the C example kernel, now there's a
+   Rust example kernel that you can use as a skeleton for your own kernel.
+   Cheers,
+   bzt
+   [61]Top
+    [62] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [63]Post Posted: Thu Jun 18, 2020 4:06 am
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   Hi All,
+   I've replaced the quick and dirty mkimg tool with a proper, fully
+   featured mkbootimg. This one is also dependency-free, with precompiled
+   single portable executables available for Windows, MacOSX and Linux.
+   It receives a JSON configuration file as input, describing the disk you
+   wish to create, and then it saves the resulting disk image. For
+   example:
+   Code:
+   {
+       "diskguid": "00000000-0000-0000-0000-000000000000",
+       "disksize": 128,
+       "align": 1024,
+       "config": "initrd.dir/sys/config",
+       "initrd": { "type": "tar", "gzip": true, "directory": "initrd.dir"
+   },
+       "iso9660": 1,
+       "partitions": [
+           { "type": "boot", "size": 16 },
+           { "type": "ntfs", "size": 16, "name": "Win Exchange" },
+           { "type": "ext4", "size": 16, "name": "Linux Exchange" },
+           { "type": "00000000-0000-0000-0000-000000000000", "size": 32,
+   "name": "MyOS usr", "file": "usrpart.bin" },
+           { "type": "00000000-0000-0000-0000-000000000000", "size": 32,
+   "name": "MyOS var", "file": "varpart.bin" }
+       ]
+   }
+   Numbers are in Megabytes, except "align", which is in Kilobytes (align
+   0 means sector alignment for partitions). This tool can create:
+     * A compressed initrd image from a directory (currently supports cpio
+       and tar, but easily expandable)
+     * An ESP FAT boot partition with all the necessary files (including
+       the newly created initrd) and boot code in VBR
+     * An MBR / GPT / ISO9660 hybrid partitioning table with the boot
+       partition on it
+     * When creating a hybrid image, it also ensures that the root
+       directory, the fat table, and the clusters on the boot partition
+       are all 2048 bytes aligned
+     * Optionally it can add any number of partitions (well, up to 248),
+       and it can fill them up using partition images
+
+   As a bonus, it also checks your kernel for BOOTBOOT compatibility,
+   gives detailed error messages, and determines the architecture from
+   your executable. It has all the binaries included, so no additional
+   files needed (this includes the Raspberry Pi firmware files too, along
+   with the Broadcom licence).
+   Creating multiarch images also possible simply by using an array:
+   Code:
+       "initrd": { "type": "tar", "gzip": true, "directory": [
+   "initrd.x86", "initrd.arm" ] },
+   This will create two initrds, one for each arch and it will put both
+   loaders for them into the boot partition (the code is written in a way
+   that it can support many architectures, however there's only loader for
+   x86_64 and AArch64, so right now it is limited to two).
+   The generated images were tested with: FAT16/32: fsck.vfat, TianoCore
+   UEFI; GPT: fdisk and gdisk's verify function. All green. :-)
+   Hope this will be useful to some of you,
+   bzt
+   [64]Top
+    [65] Profile
+
+   Korona
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [66]Post Posted: Thu Jun 18, 2020 8:28 am
+
+                                   Offline
+                                    Member
+                                   Member
+
+   Joined: Thu May 17, 2007 1:27 pm
+   Posts: 999
+   bzt wrote:
+   There can be one loadable segment (concatenated code, data and bss
+   sections) in the kernel. With level 2, the size limit for this is
+   raised to 16M (in comparition level 1 has a limit of 2M for info,
+   environment, code, data, bss and stack). That must be enough for
+   monolithic kernels too. You must be careful where you put these,
+   because these might overlap, and there's absolutely no checks (it is
+   perfectly valid for a kernel to request mapping of bootboot struct and
+   the environment string into the middle of its bss section for example.)
+   Out of curiosity, why did you pick this design? Do you map that segment
+   as RWX? That prevents many useful sanity checking ("security") features
+   from working, in particular WP and NX. WP and NX have probably caught
+   more memory bugs in Managarm than any other measures combined. (I mean
+   sure, one can enable it retroactively by some linker script hacks but
+   that seems unnecessarily clunky for something that just amounts to
+   setting two bits in the bootloader).
+   _________________
+   [67]managarm: Microkernel-based OS capable of running a Wayland desktop
+   (Discord: [68]https://discord.gg/7WB6Ur3). My OS-dev projects:
+   [[69]mlibc: Portable C library for managarm, qword, Linux, Sigma, ...]
+   [[70]LAI: AML interpreter] [[71]xbstrap: Build system for OS
+   distributions].
+   [72]Top
+    [73] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [74]Post Posted: Thu Jun 18, 2020 10:58 am
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   Hi,
+   Thanks for checking out! The answer is in the documentation, but thanks
+   for asking!
+   Korona wrote:
+   Out of curiosity, why did you pick this design? Do you map that segment
+   as RWX?
+   It uses one boot segment for several reasons:
+   First, for simplicity. This is also required by many other boot loaders
+   (all that works with raw binaries this is actually a must otherwise you
+   can't convert ELFs to raws).
+   Second, BOOTBOOT can load PE32+ kernels too, and PE doesn't have the
+   concept of multiple, configurable load segments (there's a segment
+   pointer for code and one for the data in the header, that's it).
+   Third, there's no point in overcomplicating because your kernel will
+   switch away from identity mapping and will use its paging tables as
+   soon as possible anyway.
+   Fourth, BOOTBOOT is a multiplatform loader, and it provides the same
+   environment on all platforms. It cannot and should not rely on specific
+   CPU features, it tries to keep it as minimal as possible so that it can
+   support more platforms. (For example AArch64 has WNX bit too, but x86
+   doesn't. If BOOTBOOT were about to utilize that, then it couldn't load
+   kernels on x86; not with the same environment that is).
+   The Multiboot specification is way too x86 related, and you simply
+   can't use that on other platforms. As a result, loaders on other
+   platforms create their own, unofficial and incompatible versions of the
+   Multiboot spec ([75]read this for example). I put a lot of effort in
+   the BOOTBOOT Specification so this can never happen to it. As it
+   focuses on the smallest common denominator among platforms, it's
+   environment should be available on all platforms, no need for tweaking
+   the spec.
+   Korona wrote:
+   That prevents many useful sanity checking ("security") features from
+   working, in particular WP and NX.
+   No, not really. My kernel, which is loaded using BOOTBOOT does take
+   advantage of both WP and NX security bits (and on AArch64 WNX too), so
+   it is doable just fine. From the BOOTBOOT User's Manual, in section
+   "Machine State":
+   Quote:
+   If a kernel wants to separate it's code on a read-only segment and data
+   on a non-executable segment for security, it
+   can override the page translation tables as soon as it gains control.
+   BOOTBOOT Protocol does only
+   handle one loadable segment for simplicity (called boot in the example
+   linker script, see Appendix).
+   Providing memory security is not a job for a bootloader. Your kernel
+   will drop identity paging and will use it's own tables anyway. It will
+   drop them for sure by the time it creates its first process. So why
+   should the loader complicate things? I like simple. This is similar how
+   GDT works (with Grub too). It is set up because it's needed, but your
+   kernel should set a known GDT according its needs as soon as possible
+   anyway.
+   In short, the bootloader's job is not to provide full functionality,
+   rather to provide the bare minimum to get the system going. For
+   example, BOOTBOOT allocates only 1k stack for your kernel per core,
+   because it doesn't want to tell you how you should organize your
+   memory. 1k is not enough for your kernel that's for sure, but it is
+   surely enough to set up paging and stacks the way your kernel wants
+   them, and that's about it.
+   Cheers,
+   bzt
+   [76]Top
+    [77] Profile
+
+   PeterX
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [78]Post Posted: Thu Jun 18, 2020 12:19 pm
+
+                                   Offline
+                                    Member
+                                   Member
+
+   Joined: Fri Nov 22, 2019 5:46 am
+   Posts: 590
+   Great job!
+   But why do you call it "micro-kernel" loader? It's seems like
+   unnecessary modesty to me. You could load nearly any kernel, don't you?
+   And what platforms is it available for so far? If I understood it
+   correctly: Legacy-BIOS PCs, UEFI PCs, RaspPi 3 and 4. Correct?
+   Greetings
+   Peter
+   [79]Top
+    [80] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [81]Post Posted: Thu Jun 18, 2020 7:36 pm
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   PeterX wrote:
+   Great job!
+   Thanks!
+   PeterX wrote:
+   But why do you call it "micro-kernel" loader? It's seems like
+   unnecessary modesty to me. You could load nearly any kernel, don't you?
+   Well, yes, it can load statically linked images, so it can load
+   monolithic kernels too. I call it micro-kernel loader because it has a
+   neat feature that no other boot loader: it loads an initrd into the
+   memory with a bunch of files, then it locates the kernel amongst them
+   (so the kernel is not a separate file as with GRUB, just a regular file
+   inside the initrd). This suits primarily the needs of a micro-kernel,
+   which requires to load several files on boot before it could access the
+   disks.
+   PeterX wrote:
+   And what platforms is it available for so far? If I understood it
+   correctly: Legacy-BIOS PCs, UEFI PCs, RaspPi 3 and 4. Correct?
+   Yes, for disks, USB storages and SD cards. Plus it can also boot from a
+   CDROM via El Torito (both legacy BIOS and UEFI), and from ROM (via BIOS
+   Boot Specification and under UEFI as Option ROM, this is useful for
+   embedded systems); you can load it as a Linux kernel (using the
+   Linux/x86 boot protocol, e.g. qemu -kernel), and last but not least,
+   via GRUB, because it supports Multiboot too. Check out
+   [82]https://gitlab.com/bztsrc/bootboot/tree/master/images, along with
+   the example images there's a Makefile with lots of qemu commands, one
+   for each boot option.
+   In the future if I could get decent documentation and my hands on a
+   board to test with, then I would like to support more. Beageboard is
+   definitely on the bucket list, for example. I was also thinking about
+   UltraSparc64, but as it's market is shrinking rapidly I don't think it
+   worth the effort any more. Making it to work with Macs also a
+   possibility (so far I had only one roadblock, I don't know how to get
+   the frame buffer address using UGA, but that's the only one thing.
+   Everything else works).
+   Greetings
+   Peter[/quote]
+   [83]Top
+    [84] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [85]Post Posted: Fri Jun 19, 2020 9:41 pm
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   Hi All,
+   Just a little heads-up. I've upgraded the image creator:
+     * It's multilingual now and autodetects OS' language
+     * If initrd is given as an image file which is compressed, now that's
+       transparently uncompressed to look up the kernel inside
+     * New initrd formats has been added, it supports hpodc cpio, POSIX
+       ustar, James Molloy's initrd and FS/Z (in lack of an up-to-date
+       specification and with no available image creator I had to abandon
+       SFS. But if you somehow manage to create an image, you can include
+       it with the "file" directive and you should still be able to boot
+       from it.)
+     * Many JSON tags made optional, the tool now calculates as much
+       properties as it can. For example on a partition it's enough to
+       specify "type" and "file".
+     * You can also use the "directory" directive for partitions, meaning
+       you can create partition images from directories on-the-fly.
+       Currently supported: tar and FS/Z (easy to add new file system
+       drivers, see documentation). Could have added cpio and jamesm too,
+       but those play not well with 512 byte sectors.
+     * I've updated the FS API to inform it if the image is generated for
+       initrd or partition (this is needed for FS/Z, because it supports
+       gaps in files, which must be avoided in initrds)
+     * On platforms that supports it (that's basically all save Windows)
+       symlinks are also parsed and generated into initrds and partition
+       images. This depends on S_ISLNK macro and readlink() libc function.
+
+   Cheers,
+   bzt
+   [86]Top
+    [87] Profile
+
+   heemogoblin
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [88]Post Posted: Thu Jul 09, 2020 7:57 am
+
+                                   Offline
+
+   Joined: Sat Jun 27, 2020 8:00 am
+   Posts: 13
+   Dear All,
+   I am somewhat new to OS development and programming but am quite
+   competent with the theory and also the implementation of an OS. Since I
+   want my OS to make use of modern features I have decided to build it
+   for UEFI firmware and x86_64 architecture. Despite being able to write
+   a UEFI bootloader, I could not find any way to compile it and get it to
+   call my kernel_main. Then I decided to use a premade bootloader, and
+   have settled on your BOOTBOOT as it directly takes me to 64 bit mode
+   and wrks with UEFI. I have looked at the example kernel and have
+   compiled it to get myself an elf kernel.
+   Now here is the problem: I don't know how to boot it in virtualbox. To
+   be more specific here are some details:
+   - I have my kernel elf (the0s.x86_64.elf) in my Documents/OS/The \OS/
+   folder.
+   - I have downloaded all of the bootboot code and have it ready.
+   - I am interested in using the bootboot.efi loader for UEFI systems.
+   - I have tried straight-up creating a .img with disk utility and
+   partitioning it but that has problems:
+   - Whenever I make one, I can only edit one of its partitions.
+   - I can only place the bootboot efi code to the system partition and
+   unfortunately the other partition for my kernel is blocked. I have
+   tried many workarounds, none work.
+   - .imgs to virtualbox are floppy controllers and it is 2020.
+   - I compiled the mkbootimg and have checked my kernel. It is OK.
+   However I have no clue how to use that to create a .iso with uefi
+   compatibility and my kernel .elf at Documents/OS/The \OS/.
+   - I am quite confused about the config file. It says that the
+   kernel=sys/config - does that mean the kernel is at sys/config/<name>
+   or is the kernel called config and is at sys/ direcotry? Also it is in
+   a specially named partition?
+   I would ideally like please some instructions to create a .iso ideally
+   or .vmdk with UEFI booting and my kernel on it so I can test out my OS.
+   Any way will do, as long as it can run on ubuntu and will give me a
+   vmdk or .iso virtualbox will boot.
+   Also could you tell me if I can pass an EFI_SystemTable or a pointer to
+   one to my kernel entry point? I'd prefer to get my time via the Runtime
+   services GetTime function.
+   Apologies for being so naive, but I shall be incredibly grateful if you
+   could help me. Thanks in advance!
+   [89]Top
+    [90] Profile
+
+   Octocontrabass
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [91]Post Posted: Sat Jul 11, 2020 10:14 pm
+
+                                   Offline
+                                    Member
+                                   Member
+
+   Joined: Mon Mar 25, 2013 7:01 pm
+   Posts: 4978
+   You can use VBoxManage to convert raw disk images to a format
+   VirtualBox supports. [92]Here is the documentation.
+   I don't see why you're not able to access more than one partition on
+   your disk image, though.
+   [93]Top
+    [94] Profile
+
+   bzt
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [95]Post Posted: Sun Jul 12, 2020 5:17 am
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Thu Oct 13, 2016 4:55 pm
+   Posts: 1584
+   Hi,
+   Yes, @Octoronstabass is right, you can access more partitions.
+   heemogoblin wrote:
+   Dear All,
+   I am somewhat new to OS development and programming but am quite
+   competent with the theory and also the implementation of an OS. Since I
+   want my OS to make use of modern features I have decided to build it
+   for UEFI firmware and x86_64 architecture. Despite being able to write
+   a UEFI bootloader, I could not find any way to compile it and get it to
+   call my kernel_main. Then I decided to use a premade bootloader, and
+   have settled on your BOOTBOOT as it directly takes me to 64 bit mode
+   and wrks with UEFI. I have looked at the example kernel and have
+   compiled it to get myself an elf kernel.
+   Now here is the problem: I don't know how to boot it in virtualbox. To
+   be more specific here are some details:
+   You create an image, then you configure your VB machine to boot with
+   UEFI: "Detials" > "System" > "Motherboard" click "Enable EFI (special
+   OSes only)".
+   I'd recommend to use mkbootimg, that's what it is for. But if you want
+   to make it by hand, here are my answers.
+   heemogoblin wrote:
+   - I have my kernel elf (the0s.x86_64.elf) in my Documents/OS/The \OS/
+   folder.
+   Move it into an initrd folder. Your kernel will be loaded as part of
+   the initrd along with other files you want/need on boot.
+   heemogoblin wrote:
+   - I have downloaded all of the bootboot code and have it ready.
+   Good.
+   heemogoblin wrote:
+   - I am interested in using the bootboot.efi loader for UEFI systems.
+   For that, you'll need the following files:
+   EFI/BOOT/BOOTX64.EFI - move bootboot.efi here
+   BOOTBOOT/INITRD - the initrd image with your kernel in it, can be a tar
+   or cpio archive, optionally gzip compressed.
+   BOOTBOOT/CONFIG - optional, a configuration that will be passed to your
+   kernel. The loader uses only two keys if given, "screen=WxH" and
+   "kernel=elf_filename_inside_the_initrd" all the others are for your
+   kernel.
+   heemogoblin wrote:
+   - I have tried straight-up creating a .img with disk utility and
+   partitioning it but that has problems:
+   - Whenever I make one, I can only edit one of its partitions.
+   - I can only place the bootboot efi code to the system partition and
+   unfortunately the other partition for my kernel is blocked. I have
+   tried many workarounds, none work.
+   Not sure what disk utility you're using, but you should create a GPT
+   disk, with a partition that is FAT formatted and copy the three files
+   above to that system partition. Change the type to EFI System Partition
+   (this is important, otherwise EFI won't recognize it). The BOOTBOOT
+   User's Manual Appendix has an example how to use fdisk and mkfs.vfat
+   for this.
+   heemogoblin wrote:
+   - .imgs to virtualbox are floppy controllers and it is 2020.
+   I don't know where you get that, .img files are just images. For
+   example Raspberry OS contains an SD card image. My mkbootimg tool
+   creates a hybrid CDROM / disk image by that extension.
+   heemogoblin wrote:
+   - I compiled the mkbootimg and have checked my kernel. It is OK.
+   However I have no clue how to use that to create a .iso with uefi
+   compatibility and my kernel .elf at Documents/OS/The \OS/.
+   If you have compiled mkbootimg, then you don't need the boot loader
+   files, as it has them already (data.c). It is well documented, and
+   example configurations are provided. Basically you give it two
+   arguments:
+   Code:
+   ./mkbootimg config.json output.img
+   The configuration is described in detail in the README (there's an
+   example json in it), there's an example json in the source directory,
+   and in the [96]images directory you can find the actual json I've used
+   to create the example disk images, as well as a Makefile on how to
+   invoke it. If you want it to generate a hybrid ISO9660 image, that can
+   be used on CDROMs, then in the mkbootimg json configuration, use
+   "iso9660=true". Here's my mkbootimg.json:
+   Code:
+   {
+       "disksize": 128,
+       "config": "./config",
+       "initrd": { "type": "cpio", "gzip": true, "directory": "initrd" },
+       "iso9660": true,
+       "partitions": [
+           { "type": "boot", "size": 16 }
+       ]
+   }
+   This instructs mkbootimg that load the BOOTBOOT configuration from the
+   file "./config", create an initrd in gzipped cpio format from the
+   contents of the directory "initrd" (copy the0s.x86_64.elf there). Make
+   the image 128M in size and CDROM compatible, and generate one ESP
+   partition only (if you need it to generate more GPT records, just add
+   more elements to the "partitions" array, and you can specify a
+   partition image too with "file").
+   There's no need to create a filesystem on the boot partition, nor to
+   copy files there, the mkbootimg will take care all of that for you.
+   heemogoblin wrote:
+   - I am quite confused about the config file. It says that the
+   kernel=sys/config - does that mean the kernel is at sys/config/<name>
+   or is the kernel called config and is at sys/ direcotry?
+   I don't know where you get "kernel=sys/config". The "kernel" is the
+   name of your kernel inside the initrd image (under the specified initrd
+   directory). If you use mkbootimg, you'll have two config files:
+     * the json - describes the disk image you want to create
+     * BOOTBOOT config - this will be copied to the ESP partition, and it
+       is going to be parsed by the loader and your kernel during boot
+
+   In the latter you can specify the name of your kernel inside the initrd
+   archive with "kernel=the0s.x86_64.elf", but that's not necessary if
+   your elf kernel is the first executable in the archive.
+   heemogoblin wrote:
+   Also it is in a specially named partition?
+   Yes, the ESP is given a fixed name, "EFI System Partition".
+   heemogoblin wrote:
+   I would ideally like please some instructions to create a .iso ideally
+   or .vmdk with UEFI booting and my kernel on it so I can test out my OS.
+   Any way will do, as long as it can run on ubuntu and will give me a
+   vmdk or .iso virtualbox will boot.
+   The mkbootimg generates a disk image file. With "iso9660=true" it is
+   going to be a hybrid image, which you can simply rename to ".iso" and
+   it will work. For vmdk I'm not sure, I use vdi for VirtualBox. For
+   that, use the following command:
+   Code:
+   VBoxManage convertfromraw yourimage.img yourimage.vdi
+   Because this will generate a random UUID for the vdi, and VirtualBox is
+   extremely picky about UUIDs matching its configuration, I'd also
+   recommend to use the following:
+   Code:
+   BoxManage internalcommands sethduuid yourimage.vdi (some fix UUID)
+   heemogoblin wrote:
+   Also could you tell me if I can pass an EFI_SystemTable or a pointer to
+   one to my kernel entry point? I'd prefer to get my time via the Runtime
+   services GetTime function.
+   Again, described in great detail in the BOOTBOOT User's Manual. The
+   protocol maps an informational structure for your kernel, which
+   contains both the EFI_SystemTable pointer and both the time (which is
+   queried by the EFI GetTime service under UEFI).
+   heemogoblin wrote:
+   Apologies for being so naive, but I shall be incredibly grateful if you
+   could help me. Thanks in advance!
+   You should read the documentation first :-) Both the READMEs and pdf
+   have all the answers for your questions. But sure, ask and I'll try to
+   answer.
+   Cheers,
+   bzt
+   [97]Top
+    [98] Profile
+
+   zaval
+    Post subject: Re: BOOTBOOT Multi-platform Micro-kernel Loader
+   [99]Post Posted: Sun Jul 12, 2020 4:03 pm
+
+                                   Offline
+                                    Member
+                                   Member
+                                 User avatar
+
+   Joined: Fri Feb 17, 2017 4:01 pm
+   Posts: 633
+   Location: Ukraine, Bachmut
+   Quote:
+   Despite being able to write a UEFI bootloader, I could not find any way
+   to compile it and get it to call my kernel_main. Then I decided to use
+   a premade bootloader, and have settled on your BOOTBOOT as it directly
+   takes me to 64 bit mode and wrks with UEFI. I have looked at the
+   example kernel and have compiled it to get myself an elf kernel.
+   does your religion allow you to use MSVC? if so, that's so easy to
+   compile your OS Loader with it as compiling "hello world". if it
+   doesn't, then *shrugs.
+   just for the note, despite bzt scares you, that you are required to
+   mark a FAT partition as ESP and use only GPT, it's not true. you can
+   create MBR with an ordinary FAT partition and be sure - it will be
+   recognized by UEFI. since you are only at the start, I'd go this way
+   (it's easier and maybe you'd have less troubles), actually, I do
+   exactly this way - I created a small .vhd with VBox - thanks god its
+   vhds are liked by diskpart (unlike qemu's). and created there a FAT
+   partition. UEFI recognizes the disk, FAT formatted volume, and, of
+   course, lets me start my OSL. either from the shell or from the Boot
+   Manager menu or, finally, by creating a Load Option with the latter, -
+   this way. Just don't forget to follow the guidelines and put your OSL
+   in \efi\<YourNameAsAnOsVendor> directory, each OSL in its own directory
+   (for the same architecture, I don't know why, but UEFI whimsically
+   demands this, despite it distinguishes images well by their paths,
+   which, of course, are different for two OSLs even lying in the same
+   directory).
+   Of course, when you create such a disk, vhd in my case, for an easy
+   access with diskpart, you can create 2 partitions or more. and then
+   your OSL may access your OS boot volume (where your OS resides) either
+   by using UEFI simple file system protocol and friends, if this is a FAT
+   volume (because, see, bzt, FAT is a required part by the standard, for
+   easening interoperability and osdevers lives :D and not for what you
+   fantasize) or using block I/O protocols given to you by UEFI for
+   reading disk sectors and then making FS related parsing in the OSL by
+   your own.
+   and at the end, again and again - on this stubborn placement in
+   \efi\boot\bootx64.efi. this is not a normal way of putting one's OSL.
+   it's just a last resort in an attempt to start something by the Boot
+   Manager on non-removable storages. if there is something else on the
+   same disk, this thing won't be even touched. I have no idea why people
+   don't look farther than this resort point. it is only somewhat and
+   questionably good for removable storages, when you are going to install
+   something and flash this storage for a one shot. even then, it's not
+   necessary, and could mess up with something else, also blindly
+   demanding itself to be placed there (and then when you put that other
+   thing there, it overwrites the previous one and you find yoursefl in a
+   mess). every UEFI has a "load from file" option - you better place your
+   stuff in a personalized place and direct Boot Manager to it. it's more
+   to bzt with his bootboot. the first time user directs the Boot Manager
+   to load your bootboot, then, at the start, bootboot checks how it was
+   started (by analyzing BootCurrent variable) and if it's not from its
+   own Load Option, which is the case for the first start, asks a user if
+   they want to install bootboot to this machine. on "yes", you just
+   create a Load Option for bootboot by using SetVariable(), and that's
+   it! this is how it should have been done.
+   _________________
+   [100]ANT - NT-like OS for x64 and arm64.
+   [101]efify - UEFI for a couple of boards (mips and arm). suspended due
+   to lost of all the target park boards (russians destroyed our town).
+   [102]Top
+    [103] Profile
+
+        Display posts from previous: [All posts] Sort by [Post time]
+                              [Ascending_]  Go
+   [104]Post new topic  [105] Reply to topic  Page 1 of 3
+    [ 39 posts ]  [106]Go to page 1, [107]2, [108]3  [109]Next
+
+   [110]Board index � [111]Operating System Development �
+   [112]Announcements, Test Requests, & Job openings
+
+   All times are UTC - 6 hours
+
+Who is online
+
+   Users browsing this forum: No registered users and 1 guest
+   You cannot post new topics in this forum
+   You cannot reply to topics in this forum
+   You cannot edit your posts in this forum
+   You cannot delete your posts in this forum
+   You cannot post attachments in this forum
+   Search for: ____________________ Go
+   Jump to: [   Announcements, Test Requests, & Job openings]  Go
+
+   Powered by [113]phpBB � 2000, 2002, 2005, 2007 phpBB Group
+
+References
+
+   Visible links:
+   1. https://forum.osdev.org/feed.php
+   2. https://forum.osdev.org/feed.php?mode=news
+   3. https://forum.osdev.org/feed.php?mode=forums
+   4. https://forum.osdev.org/feed.php?mode=topics
+   5. https://forum.osdev.org/feed.php?mode=topics_active
+   6. https://forum.osdev.org/feed.php?f=2
+   7. https://forum.osdev.org/feed.php?f=2&t=33362
+   8. https://forum.osdev.org/./ucp.php?mode=login&sid=3552323bd691ba1e0995756313969b5b
+   9. https://forum.osdev.org/./ucp.php?mode=register&sid=3552323bd691ba1e0995756313969b5b
+  10. http://wiki.osdev.org/
+  11. https://forum.osdev.org/./faq.php?sid=3552323bd691ba1e0995756313969b5b
+  12. https://forum.osdev.org/./search.php?sid=3552323bd691ba1e0995756313969b5b
+  13. https://forum.osdev.org/./search.php?search_id=unanswered&sid=3552323bd691ba1e0995756313969b5b
+  14. https://forum.osdev.org/./search.php?search_id=active_topics&sid=3552323bd691ba1e0995756313969b5b
+  15. https://forum.osdev.org/./index.php?sid=3552323bd691ba1e0995756313969b5b
+  16. https://forum.osdev.org/./viewforum.php?f=16&sid=3552323bd691ba1e0995756313969b5b
+  17. https://forum.osdev.org/./viewforum.php?f=2&sid=3552323bd691ba1e0995756313969b5b
+  18. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&start=0&sid=3552323bd691ba1e0995756313969b5b
+  19. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=1950&sid=3552323bd691ba1e0995756313969b5b
+  20. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=3731&sid=3552323bd691ba1e0995756313969b5b
+  21. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=155&sid=3552323bd691ba1e0995756313969b5b
+  22. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=1902&sid=3552323bd691ba1e0995756313969b5b
+  23. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=2477&sid=3552323bd691ba1e0995756313969b5b
+  24. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=67&sid=3552323bd691ba1e0995756313969b5b
+  25. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=14234&sid=3552323bd691ba1e0995756313969b5b
+  26. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=11616&sid=3552323bd691ba1e0995756313969b5b
+  27. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=12891&sid=3552323bd691ba1e0995756313969b5b
+  28. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=117&sid=3552323bd691ba1e0995756313969b5b
+  29. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=4287&sid=3552323bd691ba1e0995756313969b5b
+  30. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=2&sid=3552323bd691ba1e0995756313969b5b
+  31. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=1906&sid=3552323bd691ba1e0995756313969b5b
+  32. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=4866&sid=3552323bd691ba1e0995756313969b5b
+  33. https://forum.osdev.org/./posting.php?mode=post&f=2&sid=3552323bd691ba1e0995756313969b5b
+  34. https://forum.osdev.org/./posting.php?mode=reply&f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b
+  35. https://forum.osdev.org/viewtopic.php?f=2&t=33362
+  36. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b&start=15
+  37. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b&start=30
+  38. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b&start=15
+  39. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&start=0&sid=3552323bd691ba1e0995756313969b5b&view=print
+  40. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&view=previous&sid=3552323bd691ba1e0995756313969b5b
+  41. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&view=next&sid=3552323bd691ba1e0995756313969b5b
+  42. https://forum.osdev.org/./viewtopic.php?p=287331&sid=3552323bd691ba1e0995756313969b5b#p287331
+  43. https://gitlab.com/bztsrc/bootboot
+  44. https://gitlab.com/bztsrc/bootboot/blob/master/mykernel/link.ld
+  45. https://gitlab.com/bztsrc/bootboot/blob/master/bootboot.h
+  46. https://gitlab.com/bztsrc/bootboot/tree/master/mykernel
+  47. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  48. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  49. https://forum.osdev.org/./viewtopic.php?p=287447&sid=3552323bd691ba1e0995756313969b5b#p287447
+  50. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  51. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=18898&sid=3552323bd691ba1e0995756313969b5b
+  52. https://forum.osdev.org/./viewtopic.php?p=288396&sid=3552323bd691ba1e0995756313969b5b#p288396
+  53. https://wiki.osdev.org/BOOTBOOT
+  54. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  55. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  56. https://forum.osdev.org/./viewtopic.php?p=305109&sid=3552323bd691ba1e0995756313969b5b#p305109
+  57. https://gitlab.com/bztsrc/bootboot/-/tree/master/images
+  58. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  59. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  60. https://forum.osdev.org/./viewtopic.php?p=306460&sid=3552323bd691ba1e0995756313969b5b#p306460
+  61. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  62. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  63. https://forum.osdev.org/./viewtopic.php?p=306501&sid=3552323bd691ba1e0995756313969b5b#p306501
+  64. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  65. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  66. https://forum.osdev.org/./viewtopic.php?p=306504&sid=3552323bd691ba1e0995756313969b5b#p306504
+  67. https://github.com/managarm/managarm
+  68. https://discord.gg/7WB6Ur3
+  69. https://github.com/managarm/mlibc
+  70. https://github.com/qword-os/lai
+  71. https://github.com/managarm/xbstrap
+  72. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  73. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=3647&sid=3552323bd691ba1e0995756313969b5b
+  74. https://forum.osdev.org/./viewtopic.php?p=306507&sid=3552323bd691ba1e0995756313969b5b#p306507
+  75. https://github.com/jncronin/rpi-boot/blob/master/MULTIBOOT-ARM
+  76. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  77. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  78. https://forum.osdev.org/./viewtopic.php?p=306511&sid=3552323bd691ba1e0995756313969b5b#p306511
+  79. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  80. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=25322&sid=3552323bd691ba1e0995756313969b5b
+  81. https://forum.osdev.org/./viewtopic.php?p=306521&sid=3552323bd691ba1e0995756313969b5b#p306521
+  82. https://gitlab.com/bztsrc/bootboot/tree/master/images
+  83. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  84. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  85. https://forum.osdev.org/./viewtopic.php?p=306532&sid=3552323bd691ba1e0995756313969b5b#p306532
+  86. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  87. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  88. https://forum.osdev.org/./viewtopic.php?p=307287&sid=3552323bd691ba1e0995756313969b5b#p307287
+  89. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  90. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=25855&sid=3552323bd691ba1e0995756313969b5b
+  91. https://forum.osdev.org/./viewtopic.php?p=307379&sid=3552323bd691ba1e0995756313969b5b#p307379
+  92. https://www.virtualbox.org/manual/ch08.html#vboxmanage-convertfromraw
+  93. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  94. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=14234&sid=3552323bd691ba1e0995756313969b5b
+  95. https://forum.osdev.org/./viewtopic.php?p=307390&sid=3552323bd691ba1e0995756313969b5b#p307390
+  96. https://gitlab.com/bztsrc/bootboot/tree/master/images
+  97. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+  98. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17273&sid=3552323bd691ba1e0995756313969b5b
+  99. https://forum.osdev.org/./viewtopic.php?p=307402&sid=3552323bd691ba1e0995756313969b5b#p307402
+ 100. https://ant-upptech.sourceforge.io/?subject=osdev
+ 101. https://efify.sourceforge.io/
+ 102. https://forum.osdev.org/viewtopic.php?f=2&t=33362#wrapheader
+ 103. https://forum.osdev.org/./memberlist.php?mode=viewprofile&u=17546&sid=3552323bd691ba1e0995756313969b5b
+ 104. https://forum.osdev.org/./posting.php?mode=post&f=2&sid=3552323bd691ba1e0995756313969b5b
+ 105. https://forum.osdev.org/./posting.php?mode=reply&f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b
+ 106. https://forum.osdev.org/viewtopic.php?f=2&t=33362
+ 107. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b&start=15
+ 108. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b&start=30
+ 109. https://forum.osdev.org/./viewtopic.php?f=2&t=33362&sid=3552323bd691ba1e0995756313969b5b&start=15
+ 110. https://forum.osdev.org/./index.php?sid=3552323bd691ba1e0995756313969b5b
+ 111. https://forum.osdev.org/./viewforum.php?f=16&sid=3552323bd691ba1e0995756313969b5b
+ 112. https://forum.osdev.org/./viewforum.php?f=2&sid=3552323bd691ba1e0995756313969b5b
+ 113. http://www.phpbb.com/
+
+   Hidden links:
+ 115. https://forum.osdev.org/./index.php?sid=3552323bd691ba1e0995756313969b5b
diff --git a/doc/klacansky.com_notes_useless-operating-system.txt b/doc/klacansky.com_notes_useless-operating-system.txt
new file mode 100644
index 0000000..cc15e24
--- /dev/null
+++ b/doc/klacansky.com_notes_useless-operating-system.txt
@@ -0,0 +1,978 @@
+                            Useless Operating System
+
+   [1]Code
+
+   In my final project for the advanced operating system class taught by
+   John Regehr, I implemented parts of an operating system inspired by
+   exo/micro kernel design.
+
+   The overall goal is to expose hardware as much as possible while
+   providing protection.
+
+   I decided to stick with amd64 architecture as porting to aarch64 seems
+   unfeasible given the time frame and I primarily use amd64 CPUs so I
+   want to know more about them. UOS does not do identity mapping of
+   physical memory outside boot thus kernel will be using virtual memory
+   in almost same way as user mode programs.
+
+   After implementing small OS I recommend it to all low-level or high
+   performance oriented programmers. More importantly, OS development is
+   joy, especially the design of interfaces and low-level tinkering.
+   Unfortunately, developing a usable operating system is no small task
+   (all those drivers) and user mode applications severely constrain the
+   kernel design. I therefore moved to compilers and programming
+   languages, which are tractable for a single person (in the days of
+   LLVM), and can significantly improve ones life by making programming
+   fun againk
+
+   Also, the development made me finally appreciate debugger, where
+   simpler printf can't be used as registers need to be inspected before
+   and after instruction execution, e.g., before and after sysret.
+
+Important
+
+     * disable red zone with compiler flag -mno-red-zone (can get
+       corrupted in nested interrupts)
+     * make sure pointers are in canonical form
+
+Motivation
+
+   In the class we used xv6, a clean Unix clone with approachable code.
+   However, adding VESA BIOS Extensions framebuffer required new system
+   calls and kernel changes which was clunky. I especially disliked that
+   any experiment required tinkering with the kernel, and provided
+   abstractions were quite high level.
+
+   Moreover, the xv6 operating system runs on 32-bit architecture, but
+   most modern CPUs are all 64-bit. Futhermore, the Amd64 architecture is
+   much simpler as it removes tons of old cruft such as segmentation and
+   has simple system calls.
+
+   The primary inspiration for UOS are MIT's JOS (micro/exo kernel) and a
+   bit L4 (microkernel). For example, almost all exceptions are handled by
+   user and not kernel, including page fault. The timer interrupt is
+   forwarded to user, but in case of buggy/malicious container, kernel
+   will wait few slots and then kill the misbehaving container. Since the
+   user knows more about its use of registers, it can save only the
+   necessary ones, e.g., may not save floating point registers. On context
+   switch, kernel stores only instruction and stack pointers.
+
+   I decided to go with capability-based permission system, where each
+   resource is associated with a "lock" and the key to the lock consists
+   of hashed permissions and the lock itself. Kernel/user can easily
+   verify the key valid by hashing the lock and permissions of the key and
+   checking if the key's hash matches. This approaches makes kernel
+   simpler, as memory that can't be exposed to user (e.g., < 1 MB) is
+   simply associated with a lock that only kernel knows. Therefore, no
+   special range checks are required in the kernel when user maps memory.
+   Shared memory is also simple to implement, as a container can easily
+   create keys for the memory it owns and give it to a newly created
+   container. Also, kernel uses same interface as user, i.e., it presents
+   key if it needs to map memory. TODO: how to safely store key if
+   Meltdown is present?
+
+   UOS in constrast to Linux does not have a text mode. All assembly is in
+   separate files as I am convinced inline assembly is not good solution.
+   Moreover, kernel consists of only single .c file, which simplifies the
+   build system to a single compile command. It targets desktops and uses
+   2 MiB pages.
+
+Amd64 Architecture
+
+   Most desktops or laptops contain amd64 processor either from Intel or
+   Amd. Since UOS targets personal computers it supports only Amd64
+   architecture. This choice simplifies the kernel as 32bit architectures
+   have much more legacy cruft. For example, Amd64 has no segmentation,
+   good syscall interface, simpler instruction set and only one necessary
+   task segment. The arguments for C ABI are passed in registers. UOS does
+   not allow kernel to read any user memory without explicit mapping.
+   Moreover, since there is more registers, all syscall arguments must use
+   them.
+
+   Any virtual address must be in canonical form, i.e., if bit 47
+   (counting from 0) is set, then all most significant bits must be also
+   set. This quirk divides the address space into 2 pieces with huge hole
+   in between them. Pretty annoying as UOS is not higher half kernel.
+
+   The System V ABI requires first 6 integer or pointer arguments to be
+   passed in registers in order rdi, rsi, rdx, rcx, r8, and r9. The
+   remaining arguments (7 and more) are passed on stack.
+
+Overview of Startup
+
+   At boot, Basic Input/Output System (BIOS) loads specific sector of 512
+   bytes into memory at fixed address and starts execution. Usually, this
+   program is a bootloader that loads the kernel, or in our case second
+   stage of the bootloader as 64 bit assembly takes more space and I could
+   not fit the ELF loader into a single sector. The second stage parses
+   ELF file (kernel) on disk and loads it into memory. The kernel
+   bootstraps itself by setting up physical and virtual memory managers
+   and core struct for the boot processor. It then switches to new page
+   table and stack and continues by starting other processors and loading
+   user init program into a container. We will now discuss individual
+   stages in more detail.
+
+Bootloader
+
+   We focus on using BIOS instead of UEFI due to simplicity of the setup
+   as there is no need for additional tools to build BIOS bootable binary.
+   The disadvantage of BIOS is all the legacy cruft, such as starting in
+   Real Mode and having only 512 bytes for bootloader.
+
+   The purpose of bootloader is to do initial setup (such as video mode,
+   virtual memory) and load kernel from a disk into a memory. Since we
+   could not squeeze bootloader with ELF loader into 512 bytes like xv6,
+   we split the bootloader into two stages, where the first stage's
+   purpose is to only load the second stage. For now, we use 4 KiB of
+   space for both stages and kernel starts at sector 8.
+
+   After the video mode and long mode setup, the bootloader reads kernel
+   from ELF binary into predefined location and jumps into the kernel
+   entry point. IDE driver is used to perform reading from a disk. UOS
+   does not support multiboot standard as xv6, so it will work only with
+   our bootloader.
+
+   The first stage must be 512 bytes and the assembly needs to be padded
+   to fill 512 bytes (and adding MBR bytes at 510 offset).
+
+  A20 Line
+
+   Used to ignore 21st bit of address to emulate wraparound from older
+   architectures. We have to enable this 21st bit so we can address all
+   memory.
+
+   xv6 bootloader uses keyboard controller to enable this bit. We opt for
+   BIOS and its functions.
+movw $0x2401, %ax
+int $0x15
+
+   http://www.win.tue.nl/~aeb/linux/kbd/A20.html
+
+  Global Descriptor Table
+
+   entry point of bootblock.o must be at the address 0x7C00 which can be
+   double checked by doing objdump -d bootblock.o
+
+   The simplest "debugging" can be done by writing to screen; segmentation
+   must be used as CGA address of 0xB8000 is outside 16-bit range. This
+   addressing can be done by setting %ds to 0xB800 (segment is multiplied
+   by 16 on access).
+
+   We directly transition from real mode to long mode without going
+   through protected mode. Since the boot sector is small (512 bytes) we
+   have to manually set up page tables instead of loading them from an
+   array.
+
+   We can't use .data segment in bootloader C code (for example to store
+   static string) as only .text is copied with objcopy (-j flag; TODO).
+
+  Bootstrap Page Table
+
+   We map same physical address [0, 1 GiB) to the virtual addresses [0x0,
+   1 GiB), and [0x8000000000, 0x8000000000 + 1 GiB). This is necessary as
+   we linked kernel to reside at address 0x8000100000 (as we want it to be
+   mapped into each process space) and when we jump to it it must be
+   mapped properly. The first 1 GiB in virtual address space are mapped
+   because it is the space in which bootloader lives and from where we
+   will perform the jump.
+
+   The reason for choosing address where all 39 bits are zero is so we can
+   have only top level page directory (PML4) which shares mapping with the
+   [0, 1 GiB) entries. This sharing is simply done by pointing PML4 entry
+   to the same entry in PDP (next level page table) as for the first
+   entry.
+
+   direct switch to long mode
+
+  Real Hardware
+
+   UOS does not boot on my desktop as it would need SATA driver. If
+   booting from USB, tt turns out if the disk lacks partition table the
+   BIOS will set DL register to 0x0, i.e., floppy, and floppy does not
+   support linear base addressing (LBA) which we use in IDE driver.
+
+ELF
+
+   Fixed-length ELF header and variable-length program header listing
+     * .text program instructions
+     * .rodata read-only data (we should make it only read-only memory)
+     * .data program's data (e.g., global variables)
+     * .bss is a reserved section for unitialized global variables (we
+       need to zero them)
+     * VMA is link address LMA is load address of .text section (as the
+       offsets in code are absolute due to not using Position Independent
+       Code)
+
+   For some stupid reason .rodata is together with .text in same segment,
+   making .rodata executable. (http://thi�baud.fr/rodata.html)
+
+  Symbol Table
+
+   Symbol table associates names with linear addresses and thus can be
+   used by the loader to find addresses of exception and interrupt
+   handlers (those must have some agreed names).
+
+Drivers
+
+  UART
+
+   Serial port for debugging. Uses port-mapped I/O which ignores virtual
+   memory (thus even if we do not map first 2 MiB it works).
+
+  IDE driver
+
+   [2]Reference
+
+   We need to read kernel from the primary ATA disk in bootloader to
+   initialize the core OS.
+
+   We wrote the function for reading sectors in assembly to avoid inline
+   assembly. Also, we could wrap individual instructions in assembly
+   functions but it was much simpler this way.
+
+Kernel Bootstrap
+
+   Mostly disabling legacy stuff and enabling modern features as the
+   processor assumes it woke up in last century.
+
+Memory Management
+
+     * frame allocator (2 MiB)
+     * avoid mapping physical memory (higher half)
+     * tried recursive paging (pain to modify other processes, TLB
+       pressure)
+     * ended up with linear/physical dual mapped page tables
+     * kernel pages marked as global
+     * kernel heap (used for most allocations)
+
+   There are roughly three levels of memory management:
+     * physical memory
+     * virtual memory
+     * heap
+
+   We use recursive page table mapping to avoid the need to remap page
+   tables each time we need to access them or to use contiguous memory
+   storage for the VA to PA and PA to VA conversions which are necessary
+   for updating the page table. The recursive mapping is the simplest as
+   we need these conversions only for page table manipulation. In other
+   cases such as memory mapped IO we would use a simple mapping.
+
+   Hmm, recursive mapping allows us to access entries of page table only
+   for currently loaded page table which is really annoying.
+
+   There are several approaches:
+     * remap physical memory to kernel and use simple +-KERNEL_BASE
+     * use recursive page table (may use it for user space)
+     * keep small space available and map the page table before modifying
+       it
+     * store virtual and physical addresses for page table (have to keep
+       them in sync, allows for custom format which gets rid of canonical
+       address issues NX bit, problems with dirty and accessed flags)
+
+  Recursive Page Table
+
+   By mapping pml4 entry to pml4 we can access all paging structures for
+   current process. The main advantage is the simplicity and constant
+   virtual address space used in kernel (since only pml4 is mapped in
+   kernel space). If we need to modify another process's page table we can
+   point it to a pml4 slot and use the recursive trick on pdp (which is
+   pml4 of the other process).
+
+   Unfortunately, there are many disadvantages:
+     * since we use 2 MiB pages we have to be careful as the recursive
+       mapping behaves as if we used 4 KiB pages
+     * large TLB footprint (as we use 2 MiB pages)
+     * clunky to modify other process page table (the pdp becomes pml4,
+       ...) - rare, but needed for container creation
+     * weird bootstrap during boot
+     * limited to x86 (not big deal for now)
+
+  Duplicated Page Table
+
+   For kernel we have a separate page table data structure that mimics
+   hardware page table.
+
+   TODO: use completely different data layout so we can easily traverse
+   the pointers from pml4 -> pdp -> pd.
+
+   For user space we may use recursive mapping to allow easy view of the
+   current virtual address space. (TODO: what about TLB pollution? We
+   can't easily map page tables as they live in kernel heap)
+
+   Backing pages cannot be moved without knowledge of the heap to avoid
+   having a separate structure for page tables.
+
+  Initialization
+
+   Since at boot we do not have valid kernel page table we have to build
+   one. This build process is quite different from modifying current page
+   table (one in CR3) as we can't use recursive mapping to access the
+   pml4, pdp, and pd tables. Furthermore, since the page table is not used
+   we do not need to invalidate TLB entries.
+
+   We assume the heap is physically contiguous at the boot time and use a
+   simple offset to setup a initial kernel page table. Alternative
+   solution would be to walk the boot page directory and find linear
+   address for a given physical address (the other way is handled by
+   recursive mapping).
+
+  Manager
+
+   Still go back and forth between the options but leaning towards thin
+   layer in kernel which will provide frame management and ability to map
+   virtual to physical addresses. The kernel would enforce permissions.
+
+   The other options are:
+     * provide only sbrk to grow heap (pain in ass to map any specific
+       frames)
+     * allocate fixed number of frames for kernel and give rest of frames
+       to user space memory manager (two context switches, painful)
+     * I can't think of anything else
+
+  Security
+
+   If I go with 2 MiB pages than we need at least 4 pages for a program (8
+   MiB):
+     * .text, read-only
+     * .rodata, read-only, no-execute
+     * .data, write, no-execute
+     * stack, write, no-execute
+
+  Heap
+
+   I started with a simple heap that allocates multiple of 4096 bytes, but
+   after watching Switch hacking talk (TODO reference) I plan to have a
+   separate allocator (slab) for each struct type, thus avoiding the
+   potential vulnerability of overwriting different struct that can happen
+   on heap (as two different types of struct cannot possibly overlap, and
+   also there are no pointer casting issues).
+
+Security
+
+     * NX, WP, SMAP, SMEP
+     * guard pages
+     * proper permissions for elf segments
+     * capability-based permission (kernel/user are "same")
+
+   The UOS is a exo/micro kernel design and thus it runs all drivers in
+   user mode (ring 3).
+
+   Kernel is prohibited to write to any user mapped memory directly and
+   must map the memory to separate address if it wants to write there (for
+   example for the info structure). Kernel never reads any user memory and
+   all syscall arguments must be passed in registers.
+
+   The recent side-channel attacks are concerning and I will probably go
+   with the separate page table for kernel and user (with the addition of
+   PCID). The Spectre attack seem much harder to mitigate. The attack
+   allows user to read all memory, thus it could read kernel key and
+   corresponding capabilities created from the key. One solution would be
+   to store the keys and capabilities in a special area not mapped when
+   user space is in flight (probably hard to not make a mistake); could we
+   create kernel where everything is non-sensitive?.
+
+   he UOS is a exo/micro kernel design and thus it runs all drivers in
+   user mode (ring 3).
+
+   Kernel is prohibited to write to any user mapped memory directly and
+   must map the memory to separate address if it wants to write there (for
+   example for the info structure). Kernel never reads any user memory and
+   all syscall arguments must be passed in registers.
+
+   The recent side-channel attacks are concerning and I will probably go
+   with the separate page table for kernel and user (with the addition of
+   PCID). The Spectre attack seem much harder to mitigate. The attack
+   allows user to read all memory, thus it could read kernel key and
+   corresponding capabilities created from the key. One solution would be
+   to store the keys and capabilities in a special area not mapped when
+   user space is in flight (probably hard to not make a mistake); could we
+   create kernel where everything is non-sensitive?.
+
+  Capability-based Mechanisms
+
+   Instead of user/group based system have fine grained badges that give
+   owner permissions. Access control lists (ACL) store list of things that
+   have access to given resource. Capabilities store list of resources a
+   thing can access.
+
+   Strive for simplicity
+
+   c-list (capability list), trapdoor functions?
+
+   random numbers (rdrand, rdseed)
+
+   How to do these?
+     * capability per frame
+     * capability per core
+
+   Anton Burtsev, http://www.ics.uci.edu/~aburtsev/
+
+   References:
+     * https://www.cs.cornell.edu/courses/cs513/2007fa/L08.html
+     * https://software.intel.com/en-us/articles/intel-digital-random-numb
+       er-generator-drng-software-implementation-guide
+
+  Design
+
+   Initial approach was to have each frame associated with capability,
+   thus when use allocated a frame he would get a key to it. This method
+   was clunky as it would be tricky to allocate multiple frames and it
+   placed large burden onto user. Furthermore, we were associating the
+   object number with the key to identify it, which caused same problems.
+
+   Instead, we use user provided key (can be random number) when a frame
+   is acquired, thus the same key can be used with any number of frames.
+   User can create capability by hashing the key and permissions. Then
+   when any access such as mapping the frame is performed, OS performs
+   same hashing and check if the hash in capability matches the computed
+   one.
+
+   TODO: test if we can use AES instead of sha256
+
+Multicore
+
+     * ACPI table to get list of cores
+     * x2APIC (MSR)
+     * GS register
+     * one kernel stack per core
+     * create container in user space (aka fork)
+
+   There are at least two ways to detect Application Processors (AP) after
+   Bootstrap Processor (BSP) starts running the bootloader: Multiprocessor
+   Specification tables or ACPI (specifically MADT).
+
+   xv6 uses MPS tables which are obsolete, so we focus on the ACPI
+   approach. The ACPI table pointer structure lives at some memory range
+   and can be checksumed to verify if the struct is really an ACPI table.
+   Since Qemu does not support x2APIC we have to memory map the LAPIC
+   registers to query the APIC id (other alternative is to use CPUID). The
+   ACPI table tells us this address and moreover it has a flag if core is
+   working (the table is filled during boot by each core).
+
+   After parsing the tables we loop through all core entries and if the
+   LAPIC id is different from the id of BSP we attempt to start the core
+   with interprocess interrupt. Then we wait until the core updates the
+   shared core array with running flag. We could do exponential startup
+   for large core counts, but currently it is not an issue. Note, we use
+   x2APIC which uses MSR as an interface instead of memory mapping.
+
+   Each core has its own kernel stack and pml4 with shared entries for the
+   kernel data structures (including heap) and for the entry which holds
+   all core kernel stacks (TODO: should cores see other cores stacks?).
+
+User Mode
+
+     * params passed only in registers
+     * fast syscall/sysret
+     * low level (map memory, create container)
+
+Graphics
+
+   The simplest way to draw pixels is uisng VGA mode 0x13 (320x200 with
+   256 colors with linear addressing) with BIOS interrupt 0x10 in
+   protected mode. This mode switch is done in bootloader before protected
+   mode is entered as only real mode can sanely interact with BIOS to
+   manipulate VGA graphics.
+# two moves can be combined into movw $0x0013, %ax
+movb    $0x0, %ah  # call function 0x0 to set video mode
+movb    $0x13, %al # use video mode 0x13
+# or movw $0x13, %ax
+int     $0x10
+
+   After changing video mode, the pixel can be accessed at physical
+   address 0xA0000, which in xv6 is mapped at KERNBASE + 0xA0000.
+
+   Unfortunately, the small resolution (and only 256 colors) provided by
+   VGA is restrictive and unsuitable for any graphics based system.
+
+   The simplest solution is to use VESA BIOS Extensions (VBE) which can
+   provide more than 1kx1k resolutions with up to 16M colors. Even though
+   modes defined by VESA are considered deprecated, they are likely to
+   work and allow to avoid finding suitable mode. This predefined modes
+   come handy as real mode needs to be used and xv6's boot code can't be
+   more than 512 bytes. Other solution would be to drop down to real mode
+   before switching to long mode (TODO).
+
+   For example, setting 1280x1024 with 16M colors and loading its
+   information.
+movw    $0x4F02, %ax    # set VBE mode function
+movw    $0x411B, %bx    # 1280x1024 24bit mode (with linear framebuffer)
+int     $0x10
+
+movw    $0x4F01, %ax    # get mode information function
+movw    $0x411B, %bx    # mode
+movw    $0x0, %di       # store the mode information struct (256 bytes) at physi
+cal address 0x0
+int     $0x10
+
+   Than we can qeury mode parameters by loading the ModeInfoBlock
+   structure. For simplicity, we load it at fixed physical address 0x0 as
+   kernel code starts on address 0x100000. Since we care only about the
+   PhysBasePtr field (tells us location of linear framebuffer), we could
+   pass it as function parameter to the kernel bootstrap function.
+
+   Now, the tricky bit; we need to create virtual memory mapping to the
+   physical address in PhysBasePtr. On my machine in Qemu, the address is
+   0xFD000000 and we need to map 4 MiB (depends on resolution and bit
+   depth) up to the address 0xFE000000 (defined in memlayout.h as
+   DEVSPACE, i.e., memory mapped I/O).
+
+   I used mappages (vm.c) in function main (main.c) to map some fixed
+   virtual address in FRAMEBUFFER to physical address at vbe->physbaseptr
+   before starting other processors.
+struct vbemodeinfo const *const vbe = (void *)KERNBASE;
+int mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm);
+mappages(kpgdir, (void *)FRAMEBUFFER, DEVSPACE - FRAMEBUFFER, vbe->physbaseptr,
+PTE_W);
+
+   Now, pixels can be written at the address FRAMEBUFFER anywhere in
+   kernel. I was surprised I could not find any exported function to
+   perform the memory mapping explicitly, another reason for exokernel.
+
+Floating Points
+
+   By default disabled on amd64
+
+   We can test in bootloader first if AVX2 is supported, and if not hang
+   the OS. Some OS will trap if user used floating point unit and enable
+   it. This mechanism allows OS to avoid storing/restoring floating point
+   registers during context switch if process does not use those
+   registers. Since UOS kernel does not save context at all (except few
+   registers on timer interrupt), it enables AVX2 at the kernel startup
+   and let's user to do register store/restore if necessary. In fact, this
+   approach is simpler as user knows what registers are being used and can
+   do some tricks. Moreover, since kernel does not use floating point
+   registers, user can pass values in registers to the next scheduled
+   container.
+
+Kernel
+
+   We map kernel at high virtual address as user space programs assume
+   loading at address 0.
+
+  Global Descriptor Table
+
+   The GDT from bootloader has only kernel code segment and no user code
+   segment, nor Task State Segment (TSS). Initially, we had one GDT shared
+   by all cores, but each core needs its own TSS, thus we could either
+   create an entry for each core, or better, duplicate whole GDT to obtain
+   full flexibility at mere cost of extra 32 bytes per core. Keeping the
+   GDT separate is the right step into the direction of sharing as little
+   as possible between cores to scale well on many core architectures (>
+   32 cores).
+
+  Memory Layout
+
+   UOS's kernel does not remap physical memory contiguously (higher half)
+   as xv6 does. Instead, the kernel uses the same interfaces as user
+   programs, allocating frame and mapping it. In linear address space, the
+   kernel lives at entry 511 (last) in PML4 page table and thus takes 512
+   GB. We chose to take whole entry for simplicity as it can be easily
+   shared by all user programs. Moreover, we mark the entry as global to
+   avoid flushing TLB on context switch.
+
+   Each core's stack lives in entry 510 and is again marked global. Stacks
+   are one page large (2 MiB), and have guard pages interspersed between
+   the stacks. We do not have kernel stack per container as there are no
+   slow system calls in the kernel (such as IO) that would need to be
+   interrupted. This push of expensive operations to user space makes
+   kernel simpler and easier to reason about.
+
+  Linking
+
+Containers
+
+   Instead of notion of a traditional UNIX process, UOS uses containers as
+   its execution unit abstraction. A container is akin to a process, but
+   it also has access to exceptions, low-level memory management, and
+   scheduling. For example, user can easily implement preemtive threads
+   inside a container.
+
+   In short, container has following entries:
+     * unique identifier
+     * PML4 pointer to a page table
+     * frame for storing rsp and rip during system call
+     * status (running, ...)
+     * penalty (increased on each tick, decreased on yield)
+
+   In contrast to xv6, we do not have any scheduler thread, so the kernel
+   thread becomes a container after bootstrap and if container is killed,
+   next container is scheduled (or in lack of container kernel panics).
+
+User Space Bootstrap
+
+   The initial user mode binary is embedded into the kernel as there is no
+   disk driver in kernel. All binaries use custom link script so there are
+   only .text, .rodata, and .data, and they have proper permissions so
+   they can be loaded into separate pages.
+
+   The ELF file is parsed for timer interrupt handler which needs to be
+   passed to function allocating quantum (so it will be scheduled to run).
+
+System Calls
+
+   xv6 uses interrupt 0x10 for system calls. We use the native
+   syscall/sysret instructions present on amd64 architectures specifically
+   designed for system calls.
+
+   Since the syscall instruction uses rcx register to store the returning
+   instruction pointer we copy the value in the register to caller saved
+   register and restore it back in the syscall to preserve C calling
+   convention.
+
+   We may need to move away from C calling convention due the need of
+   having more than 6 function arguments for the syscall (7-th is on the
+   stack and uOS is prohibited from touching any user memory).
+
+   UOS relies on a C calling convention for system calls, but it must
+   clear caller saved registers before returning to the user as it could
+   leak information from the kernel (e.g., private key).
+
+Entering User Mode
+
+   Since amd64 has no "direct" instruction to switch to user mode we have
+   to pretend that the container is returning either from interrupt
+   (iretq) or system call (sysretq). We used the system call intruction as
+   it does not touch user's stack, thus we can avoid writing to user's
+   memory (which we avoid for security reasons).
+
+Traps and Interrupts
+
+   256 slots total.
+
+   Must be acknowledged
+
+   Priorities, higher is handled first
+
+   TODO: do software excetions need to be acknowledged?
+
+   Needs one Task State Segment (TSS)
+
+   User space exception list:
+     * #DE - divide by zero (vector 0)
+     * #PF - page fault (vector 14) - TODO: user can't read CR2; should
+       kernel push CR2?
+     * #XF - SIMD floating point (vector 19)
+
+   User space rare exceptions (which we let double fault):
+     * #SS - stack exception (vector 12) - TODO: detects canonical address
+       issue
+     * #UD - invalid opcode (vector 6) - TODO: mostly legacy instructions
+       and very rare
+     * #GP - general protection (vector 13) - TODO: mostly for unaligned
+       SSE
+     * #AC - alignment check (vector 17) - can happen only if CPL = 3
+
+   Kernel space exception list:
+     * #DF - double fault (vector 8)
+     * #MC - machine check (vector 18)
+
+   Legacy exceptions:
+     * Non-maskable interrupt (vector 2)
+     * #OF - overflow (vector 4)
+     * #BR - bound range (vector 5)
+     * #NM - device not available (vector 7)
+     * #TS - invalid TSS (vector 10)
+     * #NP - segment not present (vector 11)
+     * #MF - x87 floating point (vector 16)
+
+  Story
+
+   Thanks to #osdev for help.
+
+   After implementing divide zero exception the kernel start rebooting.
+   Changing the .text segment to writable stopped the restarting and
+   seemingly fixed the problem. However, exception should not modify the
+   .text segment so after verifying the IDT is correct I had to check what
+   changed in .text segment.
+
+   I copied all .text data and run comparison after the fault happened.
+   The mismatch was found:
+mismatch at FFFFFF80000000C0: 20990000000000 20980000000000
+
+   Which refered to GDT code segment entry and specifically the accessed
+   bit got flipped. The reason is that during fault the code segment is
+   loaded, but for syscall/sysret it is not, so it was confusing.
+
+   For now, the solution was to move non-bootstrap GDT to .data segment.
+   NOTE: bootstrap GDT is in .text segment and thus the page for starting
+   APs must be writable.
+
+  Division by zero
+
+   When asked on IRC to about advancing RIP after division by zero I got
+   this response :).
+
+   It's *possible*, but it's also likely to break dozens of known good
+   assumptions about computer science and invoke a metric shitload of
+   undefined behaviour in the process (Kazinsal #osdev)
+
+  Timer Interrupt
+
+   So far, all non-critical exceptions were handled by currently running
+   user mode container. Unfortunately, I am not sure how to make the LAPIC
+   timer in user mode as it requires writes to a MSR to reset it (TSC
+   deadline), and more importantly kernel has to relinquish control in
+   case of adversary/buggy container that is reluctant to yield on the
+   interrupt.
+
+   Initially, I thought setting up two timers, one at higher frequency
+   (user mode) and the other at lower frequency (kernel mode) would be
+   neat, but LAPIC has only one timer interrupt vector. Thus, the current
+   plan is to point the interrupt vector to kernel which will bump up
+   penalty integer (if next quantum is a different container) and returns
+   to present container. Since the registers on context switch are saved
+   by containers, kernel must (I think) use either user mode stack (can be
+   read-only) or registers to at minimum save current RIP of the user
+   container before jumping to the handler. If sysretq is used, RCX and
+   R11 must be saved on user stack in addition, thus to better mimick
+   traditional exception, a user read-only stack seems as a better
+   solution (there is the GS/FS register too, but I rather have user
+   complete freedom and not reserve any registers - probably makes it more
+   portable too). At the end, I decided to create read-only container
+   environment (basically, shared memory with kernel) that can be used to
+   share information such as, the current penalty count, last RIP (on
+   interrupt), next quantum container id, and other different kernel
+   counters for active container. This area requires one frame (2 MiB) and
+   is mapped as write into kernel, and as read into user. The policy of
+   kernel never reading any user mode memory (only registers are allowed)
+   is still enforced.
+
+   After kernel returns to the user mode interrupt handler, the container
+   is expected to save its context if it is necessary and yield. There are
+   exceptions, for example if the next quantum is the same container,
+   there is no need to save anything nor yield, the container can restore
+   the stack pointer and jump back to the saved or different RIP. This
+   mechanism can be used to implement user space threads by simply
+   allocating few consecutive quanta (such as 10, each 1 ms long) and
+   yield only on the last one, so the kernel can switch to different
+   container.
+
+   Normally, I use null segment for SS, but when returning from CPL < 3
+   into CPL = 3 via iretq, the SS will get reloaded and will cause general
+   protection fault. Thus a not null selector must be created in GDT and
+   SS must be loaded with that selector index. We use a simple trick where
+   SS gets loaded with proper selector after executing sysretq instruction
+   (starts user mode container). It is possible to use sysretq, but then
+   two extra registers needs to be stored in info (rcx and r11); with
+   iretq we need to store only rip. For user space exceptions we do not
+   follow the manual and avoid iretq as it would set the accesses bit in
+   GDT entry and page fault. (TODO: manual explicitly requires to use
+   iretq)
+
+   TODO: handle what happens if container's interrupt handler gets
+   interrupted (I am thinking about unique ID written by kernel into info
+   as RIP can't be used reliably to identify interrupts). The container
+   should be penalized (and eventually killed), but more importantly,
+   there must be a plan for handling the user space stack in sane way (and
+   not losing some space on it, due to RSP being bump in handler, but not
+   decremented).
+
+   Protocol:
+    1. interrupt jumps into kernel handler
+    2. if current container penalty is 3, kill it and schedule next
+       container
+    3. increase penalty
+    4. update RIP in the info
+    5. set next LAPIC deadline
+    6. jump into user handler
+    7. user is expected to save registers and yield
+    8. decrease penalty (and if negative set to 0)
+    9. schedule next container
+
+   Notice, the penalty indicates if there is any nesting in timer
+   interrupt. A well behaving program should always have penalty equal 1
+   when entering timer interrupt handler.
+
+   Since the user mode bootstrap container must handle timer interrupt, we
+   parse the ELF symbol table and search for timer_interrupt_asm assembly
+   function. This function is then called by kernel timer interrupt
+   handler.
+
+   The kernel interrupt handler is fully written in assembly to avoid any
+   surprises in terms of register modifications. The current
+   implementation needs to push and pop only 3 registers, RAX, RCX, and
+   RDX.
+
+Compile Flags
+
+   -mno-red-zone (disable red zone as interrupts do not respect convention
+   of leaving extra stack space for leaf functions)
+
+Tasks
+
+     * tests
+          + [ ] frame allocator
+          + [ ] sha256
+     * style
+          + [X] header only
+          + [ ] Intel-style assembly
+     * bootloader
+          + [X] load rest of bootloader with BIOS (sort of 2 stage)
+          + [X] switch from real mode to long mode
+          + [ ] query video modes and save this information at fixed
+            address
+          + [ ] switch to most suitable video mode
+          + [X] load ELF binary containing kernel (uses IDE)
+          + [ ] switch to SATA?
+          + [ ] boot on real hardware (needs SATA)
+     * kernel
+          + [X] make work on KVM (needed to disable SMAP on Haswell)
+          + [?] add performance counters to evaluate impact of recursive
+            paging on TLB misses
+               o abandoned recursive paging due to clunky mapping of other
+                 page tables
+          + [X] start application processors (AP)
+          + [X] create kernel process for each application processor (AP)
+          + [X] add spinlock (test it by cores racing on it)
+          + [X] use RDGSBASE to get pointer to core structure instead of
+            the current offset stuff
+          + [X] check required feature set with CPUID
+          + [ ] SKINIT instruction (read Security section in manual)
+          + [ ] restructure so the end is at 16 MiB boundary
+          + [ ] avoid global variables
+          + [ ] use ILP64 mode (sizeof (int) == 8) - seems hard as there
+            is no compiler support
+          + [X] timer (local APIC; TSC-Deadline mode)
+          + [X] kernel double fault handler
+          + [X] enable SCE (system call extensions) in EFER
+          + [X] add system call support (syscall)
+          + [ ] syscall user rip security (validate it is canonical?)
+          + [X] add containers (similar to process abstraction)
+          + [X] load user space bootstrap ELF embedded inside kernel
+          + [X] page permissions for ELF segments
+          + [X] parse ELF symbol table to find timer interrupt handler
+          + [X] switch to user space (via sysretq)
+          + [X] use x2APIC (through MSR) instead of old xAPIC
+          + [X] wait 10 ms on AP startup
+          + [ ] IPC (async to avoid deadlocks...)
+          + [ ] AVX2
+          + [X] one TSS
+          + [ ] sleep idle cores (hlt instructions)
+          + [ ] expose some data structures (and update during task
+            switch?)
+          + [ ] Continuation style system (TODO: read papers on it)
+          + [ ] init system (so servers can reserve resources)
+          + [ ] struct type for physical addresses (what about linear
+            addresses? always a pointer?)
+          + [X] add HPET support (to compute TSC rate)
+          + [ ] compute TSC rate for each core independently (good idea?)
+          + [ ] destroy container on double fault or other error
+     * memory management (only 2 MiB pages, thus only PML4, PDP, and PD is
+       used)
+          + [X] boot page table
+          + [X] kernel page table
+          + [X] recursive PML4 (no longer used for kernel space)
+          + [X] frame allocator
+          + [X] page allocator
+          + [X] heap allocator (only alloc, no free)
+          + [X] duplicated page table (allows easy traversal of any page
+            table - even inactive one)
+          + [X] page mapping interface (global marked pages for kernel to
+            avoid TLB invalidation; write protect bit CR0)
+          + [ ] alignment checking in user space?
+          + [X] Supervisor-Mode Execution Prevention (CR4.SMEP)
+          + [X] non-executable pages (NXE in EFER)
+          + [ ] TLB (tagged)
+          + [X] shared memory (uses capabilities)
+          + [ ] replace heap with slab allocator
+     * scheduling
+          + [ ] penalty protocol for wonky containers
+          + [ ] round-robin scheduler
+          + [ ] container migration to other core
+     * syscalls
+          + [X] log to UART
+          + [ ] frame alloc/free
+          + [ ] map/unmap
+          + [X] alloc/free container
+          + [X] quanta alloc, yield
+          + [X] userspace register store
+     * debugging
+          + [X] setup .gdbinit for automatic GDB startup
+     * security
+          + [ ] separate page table for user and kernel (KAISER)
+          + [X] use guard page for each core kernel stack
+          + [X] proper page flags for .text, .rodata, .data
+          + [X] capability-based resource management
+          + [ ] separate memory pools for each struct type (attacker can
+            only modify same type of struct)
+          + [ ] MTTR (prevent DMA to write into kernel memory)
+          + [ ] kernel ASLR (both linear and physical address; PIC needed)
+          + [ ] random location for user stack
+          + [ ] finer granularity permissions for capabilities
+          + [ ] check if AES can be used instead of sha256
+          + [ ] namespaces?
+          + [ ] string type with length?
+          + [ ] stack protection
+          + [ ] do not map kernel into user space? (recent patches in NT
+            and Linux do it)
+     * libs
+          + [X] sprintf
+          + [X] sha256 routine
+          + [X] random numbers (via rdrand instruction)
+          + [X] strcmp
+     * user space
+          + [X] simple game of life render test
+          + [X] exceptions (divide by zero, page fault, ...)
+          + [X] timer interrupt
+          + [ ] compositor
+     * ACPI
+          + [X] parse MADT for detecting CPUs
+          + [X] build parsed struct with LAPICs
+          + [ ] parse x2APIC (Qemu unfortunately has ACPI 1.0 only)
+     * drivers
+          + [ ] SATA (high priority)
+          + [X] basic serial port
+          + [X] IDE
+          + [ ] keyboard (needs USB stack)
+          + [ ] mouse (needs USB stack)
+          + [ ] Intel GPU driver (blitting support)
+     * graphics
+          + [ ] GUI library
+          + [ ] tiling manager
+     * file system
+          + [ ] sector-based addressing (similar to page frames)
+          + [ ] memory mapped filesystem?
+          + [ ] some filesystem format
+     * networking (not likely before Christmas)
+          + [ ] ethernet driver
+          + [ ] packet filter (downloadable?)
+     * JIT (use wasm or some other typesafe IR and thus run everything at
+       ring 0)
+          + [X] interpreter of a subset of WASM
+
+References
+
+     * Exokernel: An Operating System Architecture for Application-Level
+       Resource Management
+     * Singularity: Rethinking the Software Stack
+     * Amoeba: a distributed operating system for the 1990s
+     * Scheduler Activations: Effective Kernel Support for the User-Level
+       Management of Parallelism
+     * A Comparison of Scheduling Algorithms for Multiprocessors
+     * L4 Microkernels: The Lessons from 20 Years of Research and
+       Deployment
+     * xv6, JOS, seL4
+     * osdev wiki
+     * Operating Systems Design and Implementation
+     * AMD64 Volume 1, 2, 3
+     * ATA Interface Reference Manual
+     * Advanced Configuration and Power Interface (ACPI) Specification
+     * ELF-64 Object File Format
+     * VESA BIOS EXTENSION (VBE) Core Functions Standard
+     * [3]The little book about OS development
+     * [4]Reading privileged memory with a side-channel
+     * [5]Kaiser
+     * [6]Hyperkernel: Push-Button Verification of an OS Kernel
+     * [7]Dune: Safe User-level Access to Privileged CPU Features
+     * Modern Operating Systems - 3rd edition, A. Tanenbaum
+
+References
+
+   1. https://klacansky.com/notes/data/useless-os.zip
+   2. http://wiki.osdev.org/ATA_PIO_Mode
+   3. https://littleosbook.github.io/
+   4. https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
+   5. https://klacansky.com/notes/reference/uos/kaiser.pdf
+   6. https://klacansky.com/notes/reference/uos/hyperkernel.pdf
+   7. https://klacansky.com/notes/reference/uos/dune.pdf
diff --git a/doc/littleosbook.github.io.txt b/doc/littleosbook.github.io.txt
new file mode 100644
index 0000000..4600b11
--- /dev/null
+++ b/doc/littleosbook.github.io.txt
@@ -0,0 +1,3261 @@
+The little book about OS development
+
+Erik Helin, Adam Renberg
+
+   [1]2015-01-19 | Commit: fe83e27dab3c39930354d2dea83f6d4ee2928212 [2]PDF
+   version
+
+Contents
+
+     * [3]1 Introduction
+          + [4]1.1 About the Book
+          + [5]1.2 The Reader
+          + [6]1.3 Credits, Thanks and Acknowledgements
+          + [7]1.4 Contributors
+          + [8]1.5 Changes and Corrections
+          + [9]1.6 Issues and where to get help
+          + [10]1.7 License
+     * [11]2 First Steps
+          + [12]2.1 Tools
+               o [13]2.1.1 Quick Setup
+               o [14]2.1.2 Programming Languages
+               o [15]2.1.3 Host Operating System
+               o [16]2.1.4 Build System
+               o [17]2.1.5 Virtual Machine
+          + [18]2.2 Booting
+               o [19]2.2.1 BIOS
+               o [20]2.2.2 The Bootloader
+               o [21]2.2.3 The Operating System
+          + [22]2.3 Hello Cafebabe
+               o [23]2.3.1 Compiling the Operating System
+               o [24]2.3.2 Linking the Kernel
+               o [25]2.3.3 Obtaining GRUB
+               o [26]2.3.4 Building an ISO Image
+               o [27]2.3.5 Running Bochs
+          + [28]2.4 Further Reading
+     * [29]3 Getting to C
+          + [30]3.1 Setting Up a Stack
+          + [31]3.2 Calling C Code From Assembly
+               o [32]3.2.1 Packing Structs
+          + [33]3.3 Compiling C Code
+          + [34]3.4 Build Tools
+          + [35]3.5 Further Reading
+     * [36]4 Output
+          + [37]4.1 Interacting with the Hardware
+          + [38]4.2 The Framebuffer
+               o [39]4.2.1 Writing Text
+               o [40]4.2.2 Moving the Cursor
+               o [41]4.2.3 The Driver
+          + [42]4.3 The Serial Ports
+               o [43]4.3.1 Configuring the Serial Port
+               o [44]4.3.2 Configuring the Line
+               o [45]4.3.3 Configuring the Buffers
+               o [46]4.3.4 Configuring the Modem
+               o [47]4.3.5 Writing Data to the Serial Port
+               o [48]4.3.6 Configuring Bochs
+               o [49]4.3.7 The Driver
+          + [50]4.4 Further Reading
+     * [51]5 Segmentation
+          + [52]5.1 Accessing Memory
+          + [53]5.2 The Global Descriptor Table (GDT)
+          + [54]5.3 Loading the GDT
+          + [55]5.4 Further Reading
+     * [56]6 Interrupts and Input
+          + [57]6.1 Interrupts Handlers
+          + [58]6.2 Creating an Entry in the IDT
+          + [59]6.3 Handling an Interrupt
+          + [60]6.4 Creating a Generic Interrupt Handler
+          + [61]6.5 Loading the IDT
+          + [62]6.6 Programmable Interrupt Controller (PIC)
+          + [63]6.7 Reading Input from the Keyboard
+          + [64]6.8 Further Reading
+     * [65]7 The Road to User Mode
+          + [66]7.1 Loading an External Program
+               o [67]7.1.1 GRUB Modules
+          + [68]7.2 Executing a Program
+               o [69]7.2.1 A Very Simple Program
+               o [70]7.2.2 Compiling
+               o [71]7.2.3 Finding the Program in Memory
+               o [72]7.2.4 Jumping to the Code
+          + [73]7.3 The Beginning of User Mode
+     * [74]8 A Short Introduction to Virtual Memory
+          + [75]8.1 Virtual Memory Through Segmentation?
+          + [76]8.2 Further Reading
+     * [77]9 Paging
+          + [78]9.1 Why Paging?
+          + [79]9.2 Paging in x86
+               o [80]9.2.1 Identity Paging
+               o [81]9.2.2 Enabling Paging
+               o [82]9.2.3 A Few Details
+          + [83]9.3 Paging and the Kernel
+               o [84]9.3.1 Reasons to Not Identity Map the Kernel
+               o [85]9.3.2 The Virtual Address for the Kernel
+               o [86]9.3.3 Placing the Kernel at 0xC0000000
+               o [87]9.3.4 Higher-half Linker Script
+               o [88]9.3.5 Entering the Higher Half
+               o [89]9.3.6 Running in the Higher Half
+          + [90]9.4 Virtual Memory Through Paging
+          + [91]9.5 Further Reading
+     * [92]10 Page Frame Allocation
+          + [93]10.1 Managing Available Memory
+               o [94]10.1.1 How Much Memory is There?
+               o [95]10.1.2 Managing Available Memory
+          + [96]10.2 How Can We Access a Page Frame?
+          + [97]10.3 A Kernel Heap
+          + [98]10.4 Further reading
+     * [99]11 User Mode
+          + [100]11.1 Segments for User Mode
+          + [101]11.2 Setting Up For User Mode
+          + [102]11.3 Entering User Mode
+          + [103]11.4 Using C for User Mode Programs
+               o [104]11.4.1 A C Library
+          + [105]11.5 Further Reading
+     * [106]12 File Systems
+          + [107]12.1 Why a File System?
+          + [108]12.2 A Simple Read-Only File System
+          + [109]12.3 Inodes and Writable File Systems
+          + [110]12.4 A Virtual File System
+          + [111]12.5 Further Reading
+     * [112]13 System Calls
+          + [113]13.1 Designing System Calls
+          + [114]13.2 Implementing System Calls
+          + [115]13.3 Further Reading
+     * [116]14 Multitasking
+          + [117]14.1 Creating New Processes
+          + [118]14.2 Cooperative Scheduling with Yielding
+          + [119]14.3 Preemptive Scheduling with Interrupts
+               o [120]14.3.1 Programmable Interval Timer
+               o [121]14.3.2 Separate Kernel Stacks for Processes
+               o [122]14.3.3 Difficulties with Preemptive Scheduling
+          + [123]14.4 Further Reading
+
+1 Introduction
+
+   This text is a practical guide to writing your own x86 operating
+   system. It is designed to give enough help with the technical details
+   while at the same time not reveal too much with samples and code
+   excerpts. We've tried to collect parts of the vast (and often
+   excellent) expanse of material and tutorials available, on the web and
+   otherwise, and add our own insights into the problems we encountered
+   and struggled with.
+
+   This book is not about the theory behind operating systems, or how any
+   specific operating system (OS) works. For OS theory we recommend the
+   book Modern Operating Systems by Andrew Tanenbaum [1]. Lists and
+   details on current operating systems are available on the Internet.
+
+   The starting chapters are quite detailed and explicit, to quickly get
+   you into coding. Later chapters give more of an outline of what is
+   needed, as more and more of the implementation and design becomes up to
+   the reader, who should now be more familiar with the world of kernel
+   development. At the end of some chapters there are links for further
+   reading, which might be interesting and give a deeper understanding of
+   the topics covered.
+
+   In [124]chapter 2 and [125]3 we set up our development environment and
+   boot up our OS kernel in a virtual machine, eventually starting to
+   write code in C. We continue in [126]chapter 4 with writing to the
+   screen and the serial port, and then we dive into segmentation in
+   [127]chapter 5 and interrupts and input in [128]chapter 6.
+
+   After this we have a quite functional but bare-bones OS kernel. In
+   [129]chapter 7 we start the road to user mode applications, with
+   virtual memory through paging ([130]chapter 8 and [131]9), memory
+   allocation ([132]chapter 10), and finally running a user application in
+   [133]chapter 11.
+
+   In the last three chapters we discuss the more advanced topics of file
+   systems ([134]chapter 12), system calls ([135]chapter 13), and
+   multitasking ([136]chapter 14).
+
+1.1 About the Book
+
+   The OS kernel and this book were produced as part of an advanced
+   individual course at the Royal Institute of Technology [2], Stockholm.
+   The authors had previously taken courses in OS theory, but had only
+   minor practical experience with OS kernel development. In order to get
+   more insight and a deeper understanding of how the theory from the
+   previous OS courses works out in practice, the authors decided to
+   create a new course, which focused on the development of a small OS.
+   Another goal of the course was writing a thorough tutorial on how to
+   develop a small OS basically from scratch, and this short book is the
+   result.
+
+   The x86 architecture is, and has been for a long time, one of the most
+   common hardware architectures. It was not a difficult choice to use the
+   x86 architecture as the target of the OS, with its large community,
+   extensive reference material and mature emulators. The documentation
+   and information surrounding the details of the hardware we had to work
+   with was not always easy to find or understand, despite (or perhaps due
+   to) the age of the architecture.
+
+   The OS was developed in about six weeks of full-time work. The
+   implementation was done in many small steps, and after each step the OS
+   was tested manually. By developing in this incremental and iterative
+   way, it was often easier to find any bugs that were introduced, since
+   only a small part of the code had changed since the last known good
+   state of the code. We encourage the reader to work in a similar way.
+
+   During the six weeks of development, almost every single line of code
+   was written by the authors together (this way of working is also called
+   pair-programming). It is our belief that we managed to avoid a lot of
+   bugs due to this style of development, but this is hard to prove
+   scientifically.
+
+1.2 The Reader
+
+   The reader of this book should be comfortable with UNIX/Linux, systems
+   programming, the C language and computer systems in general (such as
+   hexadecimal notation [3]). This book could be a way to get started
+   learning those things, but it will be more difficult, and developing an
+   operating system is already challenging on its own. Search engines and
+   other tutorials are often helpful if you get stuck.
+
+1.3 Credits, Thanks and Acknowledgements
+
+   We'd like to thank the OSDev community [4] for their great wiki and
+   helpful members, and James Malloy for his eminent kernel development
+   tutorial [5]. We'd also like to thank our supervisor Torbj�rn Granlund
+   for his insightful questions and interesting discussions.
+
+   Most of the CSS formatting of the book is based on the work by Scott
+   Chacon for the book Pro Git, [137]http://progit.org/.
+
+1.4 Contributors
+
+   We are very grateful for the patches that people send us. The following
+   users have all contributed to this book:
+     * [138]alexschneider
+     * [139]Avidanborisov
+     * [140]nirs
+     * [141]kedarmhaswade
+     * [142]vamanea
+     * [143]ansjob
+
+1.5 Changes and Corrections
+
+   This book is hosted on Github - if you have any suggestions, comments
+   or corrections, just fork the book, write your changes, and send us a
+   pull request. We'll happily incorporate anything that makes this book
+   better.
+
+1.6 Issues and where to get help
+
+   If you run into problems while reading the book, please check the
+   issues on Github for help:
+   [144]https://github.com/littleosbook/littleosbook/issues.
+
+1.7 License
+
+   All content is under the Creative Commons Attribution Non Commercial
+   Share Alike 3.0 license,
+   [145]http://creativecommons.org/licenses/by-nc-sa/3.0/us/. The code
+   samples are in the public domain - use them however you want.
+   References to this book are always received with warmth.
+
+2 First Steps
+
+   Developing an operating system (OS) is no easy task, and the question
+   "How do I even begin to solve this problem?" is likely to come up
+   several times during the course of the project for different problems.
+   This chapter will help you set up your development environment and
+   booting a very small (and primitive) operating system.
+
+2.1 Tools
+
+2.1.1 Quick Setup
+
+   We (the authors) have used Ubuntu [6] as the operating system for doing
+   OS development, running it both physically and virtually (using the
+   virtual machine VirtualBox [7]). A quick way to get everything up and
+   running is to use the same setup as we did, since we know that these
+   tools work with the samples provided in this book.
+
+   Once Ubuntu is installed, either physical or virtual, the following
+   packages should be installed using apt-get:
+    sudo apt-get install build-essential nasm genisoimage bochs bochs-sdl
+
+2.1.2 Programming Languages
+
+   The operating system will be developed using the C programming language
+   [8][9], using GCC [10]. We use C because developing an OS requires a
+   very precise control of the generated code and direct access to memory.
+   Other languages that provide the same features can also be used, but
+   this book will only cover C.
+
+   The code will make use of one type attribute that is specific for GCC:
+    __attribute__((packed))
+
+   This attribute allows us to ensure that the compiler uses a memory
+   layout for a struct exactly as we define it in the code. This is
+   explained in more detail in the next chapter.
+
+   Due to this attribute, the example code might be hard to compile using
+   a C compiler other than GCC.
+
+   For writing assembly code, we have chosen NASM [11] as the assembler,
+   since we prefer NASM's syntax over GNU Assembler.
+
+   Bash [12] will be used as the scripting language throughout the book.
+
+2.1.3 Host Operating System
+
+   All the code examples assumes that the code is being compiled on a UNIX
+   like operating system. All code examples have been successfully
+   compiled using Ubuntu [6] versions 11.04 and 11.10.
+
+2.1.4 Build System
+
+   Make [13] has been used when constructing the Makefile examples.
+
+2.1.5 Virtual Machine
+
+   When developing an OS it is very convenient to be able to run your code
+   in a virtual machine instead of on a physical computer, since starting
+   your OS in a virtual machine is much faster than getting your OS onto a
+   physical medium and then running it on a physical machine. Bochs [14]
+   is an emulator for the x86 (IA-32) platform which is well suited for OS
+   development due to its debugging features. Other popular choices are
+   QEMU [15] and VirtualBox [7]. This book uses Bochs.
+
+   By using a virtual machine we cannot ensure that our OS works on real,
+   physical hardware. The environment simulated by the virtual machine is
+   designed to be very similar to their physical counterparts, and the OS
+   can be tested on one by just copying the executable to a CD and finding
+   a suitable machine.
+
+2.2 Booting
+
+   Booting an operating system consists of transferring control along a
+   chain of small programs, each one more "powerful" than the previous
+   one, where the operating system is the last "program". See the
+   following figure for an example of the boot process:
+   An example of the boot process. Each box is a program.
+
+   An example of the boot process. Each box is a program.
+
+2.2.1 BIOS
+
+   When the PC is turned on, the computer will start a small program that
+   adheres to the Basic Input Output System (BIOS) [16] standard. This
+   program is usually stored on a read only memory chip on the motherboard
+   of the PC. The original role of the BIOS program was to export some
+   library functions for printing to the screen, reading keyboard input
+   etc. Modern operating systems do not use the BIOS' functions, they use
+   drivers that interact directly with the hardware, bypassing the BIOS.
+   Today, BIOS mainly runs some early diagnostics (power-on-self-test) and
+   then transfers control to the bootloader.
+
+2.2.2 The Bootloader
+
+   The BIOS program will transfer control of the PC to a program called a
+   bootloader. The bootloader's task is to transfer control to us, the
+   operating system developers, and our code. However, due to some
+   restrictions[146]^1 of the hardware and because of backward
+   compatibility, the bootloader is often split into two parts: the first
+   part of the bootloader will transfer control to the second part, which
+   finally gives control of the PC to the operating system.
+
+   Writing a bootloader involves writing a lot of low-level code that
+   interacts with the BIOS. Therefore, an existing bootloader will be
+   used: the GNU GRand Unified Bootloader (GRUB) [17].
+
+   Using GRUB, the operating system can be built as an ordinary ELF [18]
+   executable, which will be loaded by GRUB into the correct memory
+   location. The compilation of the kernel requires that the code is laid
+   out in memory in a specific way (how to compile the kernel will be
+   discussed later in this chapter).
+
+2.2.3 The Operating System
+
+   GRUB will transfer control to the operating system by jumping to a
+   position in memory. Before the jump, GRUB will look for a magic number
+   to ensure that it is actually jumping to an OS and not some random
+   code. This magic number is part of the multiboot specification [19]
+   which GRUB adheres to. Once GRUB has made the jump, the OS has full
+   control of the computer.
+
+2.3 Hello Cafebabe
+
+   This section will describe how to implement of the smallest possible OS
+   that can be used together with GRUB. The only thing the OS will do is
+   write 0xCAFEBABE to the eax register (most people would probably not
+   even call this an OS).
+
+2.3.1 Compiling the Operating System
+
+   This part of the OS has to be written in assembly code, since C
+   requires a stack, which isn't available (the chapter [147]"Getting to
+   C" describes how to set one up). Save the following code in a file
+   called loader.s:
+    global loader                   ; the entry symbol for ELF
+
+    MAGIC_NUMBER equ 0x1BADB002     ; define the magic number constant
+    FLAGS        equ 0x0            ; multiboot flags
+    CHECKSUM     equ -MAGIC_NUMBER  ; calculate the checksum
+                                    ; (magic number + checksum + flags should eq
+ual 0)
+
+    section .text:                  ; start of the text (code) section
+    align 4                         ; the code must be 4 byte aligned
+        dd MAGIC_NUMBER             ; write the magic number to the machine code
+,
+        dd FLAGS                    ; the flags,
+        dd CHECKSUM                 ; and the checksum
+
+    loader:                         ; the loader label (defined as entry point i
+n linker script)
+        mov eax, 0xCAFEBABE         ; place the number 0xCAFEBABE in the registe
+r eax
+    .loop:
+        jmp .loop                   ; loop forever
+
+   The only thing this OS will do is write the very specific number
+   0xCAFEBABE to the eax register. It is very unlikely that the number
+   0xCAFEBABE would be in the eax register if the OS did not put it there.
+
+   The file loader.s can be compiled into a 32 bits ELF [18] object file
+   with the following command:
+    nasm -f elf32 loader.s
+
+2.3.2 Linking the Kernel
+
+   The code must now be linked to produce an executable file, which
+   requires some extra thought compared to when linking most programs. We
+   want GRUB to load the kernel at a memory address larger than or equal
+   to 0x00100000 (1 megabyte (MB)), because addresses lower than 1 MB are
+   used by GRUB itself, BIOS and memory-mapped I/O. Therefore, the
+   following linker script is needed (written for GNU LD [20]):
+ENTRY(loader)                /* the name of the entry label */
+
+SECTIONS {
+    . = 0x00100000;          /* the code should be loaded at 1 MB */
+
+    .text ALIGN (0x1000) :   /* align at 4 KB */
+    {
+        *(.text)             /* all text sections from all files */
+    }
+
+    .rodata ALIGN (0x1000) : /* align at 4 KB */
+    {
+        *(.rodata*)          /* all read-only data sections from all files */
+    }
+
+    .data ALIGN (0x1000) :   /* align at 4 KB */
+    {
+        *(.data)             /* all data sections from all files */
+    }
+
+    .bss ALIGN (0x1000) :    /* align at 4 KB */
+    {
+        *(COMMON)            /* all COMMON sections from all files */
+        *(.bss)              /* all bss sections from all files */
+    }
+}
+
+   Save the linker script into a file called link.ld. The executable can
+   now be linked with the following command:
+    ld -T link.ld -melf_i386 loader.o -o kernel.elf
+
+   The final executable will be called kernel.elf.
+
+2.3.3 Obtaining GRUB
+
+   The GRUB version we will use is GRUB Legacy, since the OS ISO image can
+   then be generated on systems using both GRUB Legacy and GRUB 2. More
+   specifically, the GRUB Legacy stage2_eltorito bootloader will be used.
+   This file can be built from GRUB 0.97 by downloading the source from
+   [148]ftp://alpha.gnu.org/gnu/grub/grub-0.97.tar.gz. However, the
+   configure script doesn't work well with Ubuntu [21], so the binary file
+   can be downloaded from
+   [149]http://littleosbook.github.com/files/stage2_eltorito. Copy the
+   file stage2_eltorito to the folder that already contains loader.s and
+   link.ld.
+
+2.3.4 Building an ISO Image
+
+   The executable must be placed on a media that can be loaded by a
+   virtual or physical machine. In this book we will use ISO [22] image
+   files as the media, but one can also use floppy images, depending on
+   what the virtual or physical machine supports.
+
+   We will create the kernel ISO image with the program genisoimage. A
+   folder must first be created that contains the files that will be on
+   the ISO image. The following commands create the folder and copy the
+   files to their correct places:
+    mkdir -p iso/boot/grub              # create the folder structure
+    cp stage2_eltorito iso/boot/grub/   # copy the bootloader
+    cp kernel.elf iso/boot/             # copy the kernel
+
+   A configuration file menu.lst for GRUB must be created. This file tells
+   GRUB where the kernel is located and configures some options:
+    default=0
+    timeout=0
+
+    title os
+    kernel /boot/kernel.elf
+
+   Place the file menu.lst in the folder iso/boot/grub/. The contents of
+   the iso folder should now look like the following figure:
+    iso
+    |-- boot
+      |-- grub
+      | |-- menu.lst
+      | |-- stage2_eltorito
+      |-- kernel.elf
+
+   The ISO image can then be generated with the following command:
+    genisoimage -R                              \
+                -b boot/grub/stage2_eltorito    \
+                -no-emul-boot                   \
+                -boot-load-size 4               \
+                -A os                           \
+                -input-charset utf8             \
+                -quiet                          \
+                -boot-info-table                \
+                -o os.iso                       \
+                iso
+
+   For more information about the flags used in the command, see the
+   manual for genisoimage.
+
+   The ISO image os.iso now contains the kernel executable, the GRUB
+   bootloader and the configuration file.
+
+2.3.5 Running Bochs
+
+   Now we can run the OS in the Bochs emulator using the os.iso ISO image.
+   Bochs needs a configuration file to start and an example of a simple
+   configuration file is given below:
+    megs:            32
+    display_library: sdl
+    romimage:        file=/usr/share/bochs/BIOS-bochs-latest
+    vgaromimage:     file=/usr/share/bochs/VGABIOS-lgpl-latest
+    ata0-master:     type=cdrom, path=os.iso, status=inserted
+    boot:            cdrom
+    log:             bochslog.txt
+    clock:           sync=realtime, time0=local
+    cpu:             count=1, ips=1000000
+
+   You might need to change the path to romimage and vgaromimage depending
+   on how you installed Bochs. More information about the Bochs config
+   file can be found at Boch's website [23].
+
+   If you saved the configuration in a file named bochsrc.txt then you can
+   run Bochs with the following command:
+    bochs -f bochsrc.txt -q
+
+   The flag -f tells Bochs to use the given configuration file and the
+   flag -q tells Bochs to skip the interactive start menu. You should now
+   see Bochs starting and displaying a console with some information from
+   GRUB on it.
+
+   After quitting Bochs, display the log produced by Boch:
+    cat bochslog.txt
+
+   You should now see the contents of the registers of the CPU simulated
+   by Bochs somewhere in the output. If you find RAX=00000000CAFEBABE or
+   EAX=CAFEBABE (depending on if you are running Bochs with or without 64
+   bit support) in the output then your OS has successfully booted!
+
+2.4 Further Reading
+
+     * Gustavo Duertes has written an in-depth article about what actually
+       happens when a x86 computer boots up,
+       [150]http://duartes.org/gustavo/blog/post/how-computers-boot-up
+     * Gustavo continues to describe what the kernel does in the very
+       early stages at
+       [151]http://duartes.org/gustavo/blog/post/kernel-boot-process
+     * The OSDev wiki also contains a nice article about booting an x86
+       computer: [152]http://wiki.osdev.org/Boot_Sequence
+
+3 Getting to C
+
+   This chapter will show you how to use C instead of assembly code as the
+   programming language for the OS. Assembly is very good for interacting
+   with the CPU and enables maximum control over every aspect of the code.
+   However, at least for the authors, C is a much more convenient language
+   to use. Therefore, we would like to use C as much as possible and use
+   assembly code only where it make sense.
+
+3.1 Setting Up a Stack
+
+   One prerequisite for using C is a stack, since all non-trivial C
+   programs use a stack. Setting up a stack is not harder than to make the
+   esp register point to the end of an area of free memory (remember that
+   the stack grows towards lower addresses on the x86) that is correctly
+   aligned (alignment on 4 bytes is recommended from a performance
+   perspective).
+
+   We could point esp to a random area in memory since, so far, the only
+   thing in the memory is GRUB, BIOS, the OS kernel and some memory-mapped
+   I/O. This is not a good idea - we don't know how much memory is
+   available or if the area esp would point to is used by something else.
+   A better idea is to reserve a piece of uninitialized memory in the bss
+   section in the ELF file of the kernel. It is better to use the bss
+   section instead of the data section to reduce the size of the OS
+   executable. Since GRUB understands ELF, GRUB will allocate any memory
+   reserved in the bss section when loading the OS.
+
+   The NASM pseudo-instruction resb [24] can be used to declare
+   uninitialized data:
+    KERNEL_STACK_SIZE equ 4096                  ; size of stack in bytes
+
+    section .bss
+    align 4                                     ; align at 4 bytes
+    kernel_stack:                               ; label points to beginning of m
+emory
+        resb KERNEL_STACK_SIZE                  ; reserve stack for the kernel
+
+   There is no need to worry about the use of uninitialized memory for the
+   stack, since it is not possible to read a stack location that has not
+   been written (without manual pointer fiddling). A (correct) program can
+   not pop an element from the stack without having pushed an element onto
+   the stack first. Therefore, the memory locations of the stack will
+   always be written to before they are being read.
+
+   The stack pointer is then set up by pointing esp to the end of the
+   kernel_stack memory:
+    mov esp, kernel_stack + KERNEL_STACK_SIZE   ; point esp to the start of the
+                                                ; stack (end of memory area)
+
+3.2 Calling C Code From Assembly
+
+   The next step is to call a C function from assembly code. There are
+   many different conventions for how to call C code from assembly code
+   [25]. This book uses the cdecl calling convention, since that is the
+   one used by GCC. The cdecl calling convention states that arguments to
+   a function should be passed via the stack (on x86). The arguments of
+   the function should be pushed on the stack in a right-to-left order,
+   that is, you push the rightmost argument first. The return value of the
+   function is placed in the eax register. The following code shows an
+   example:
+    /* The C function */
+    int sum_of_three(int arg1, int arg2, int arg3)
+    {
+        return arg1 + arg2 + arg3;
+    }
+    ; The assembly code
+    external sum_of_three   ; the function sum_of_three is defined elsewhere
+
+    push dword 3            ; arg3
+    push dword 2            ; arg2
+    push dword 1            ; arg1
+    call sum_of_three       ; call the function, the result will be in eax
+
+3.2.1 Packing Structs
+
+   In the rest of this book, you will often come across "configuration
+   bytes" that are a collection of bits in a very specific order. Below
+   follows an example with 32 bits:
+Bit:     | 31     24 | 23          8 | 7     0 |
+Content: | index     | address       | config  |
+
+   Instead of using an unsigned integer, unsigned int, for handling such
+   configurations, it is much more convenient to use "packed structures":
+    struct example {
+        unsigned char config;   /* bit 0 - 7   */
+        unsigned short address; /* bit 8 - 23  */
+        unsigned char index;    /* bit 24 - 31 */
+    };
+
+   When using the struct in the previous example there is no guarantee
+   that the size of the struct will be exactly 32 bits - the compiler can
+   add some padding between elements for various reasons, for example to
+   speed up element access or due to requirements set by the hardware
+   and/or compiler. When using a struct to represent configuration bytes,
+   it is very important that the compiler does not add any padding,
+   because the struct will eventually be treated as a 32 bit unsigned
+   integer by the hardware. The attribute packed can be used to force GCC
+   to not add any padding:
+    struct example {
+        unsigned char config;   /* bit 0 - 7   */
+        unsigned short address; /* bit 8 - 23  */
+        unsigned char index;    /* bit 24 - 31 */
+    } __attribute__((packed));
+
+   Note that __attribute__((packed)) is not part of the C standard - it
+   might not work with all C compilers.
+
+3.3 Compiling C Code
+
+   When compiling the C code for the OS, a lot of flags to GCC need to be
+   used. This is because the C code should not assume the presence of a
+   standard library, since there is no standard library available for our
+   OS. For more information about the flags, see the GCC manual.
+
+   The flags used for compiling the C code are:
+    -m32 -nostdlib -nostdinc -fno-builtin -fno-stack-protector -nostartfiles
+    -nodefaultlibs
+
+   As always when writing C programs we recommend turning on all warnings
+   and treat warnings as errors:
+    -Wall -Wextra -Werror
+
+   You can now create a function kmain in a file called kmain.c that you
+   call from loader.s. At this point, kmain probably won't need any
+   arguments (but in later chapters it will).
+
+3.4 Build Tools
+
+   Now is also probably a good time to set up some build tools to make it
+   easier to compile and test-run the OS. We recommend using make [13],
+   but there are plenty of other build systems available. A simple
+   Makefile for the OS could look like the following example:
+    OBJECTS = loader.o kmain.o
+    CC = gcc
+    CFLAGS = -m32 -nostdlib -nostdinc -fno-builtin -fno-stack-protector \
+             -nostartfiles -nodefaultlibs -Wall -Wextra -Werror -c
+    LDFLAGS = -T link.ld -melf_i386
+    AS = nasm
+    ASFLAGS = -f elf
+
+    all: kernel.elf
+
+    kernel.elf: $(OBJECTS)
+        ld $(LDFLAGS) $(OBJECTS) -o kernel.elf
+
+    os.iso: kernel.elf
+        cp kernel.elf iso/boot/kernel.elf
+        genisoimage -R                              \
+                    -b boot/grub/stage2_eltorito    \
+                    -no-emul-boot                   \
+                    -boot-load-size 4               \
+                    -A os                           \
+                    -input-charset utf8             \
+                    -quiet                          \
+                    -boot-info-table                \
+                    -o os.iso                       \
+                    iso
+
+    run: os.iso
+        bochs -f bochsrc.txt -q
+
+    %.o: %.c
+        $(CC) $(CFLAGS)  $< -o $@
+
+    %.o: %.s
+        $(AS) $(ASFLAGS) $< -o $@
+
+    clean:
+        rm -rf *.o kernel.elf os.iso
+
+   The contents of your working directory should now look like the
+   following figure:
+    .
+    |-- bochsrc.txt
+    |-- iso
+    |   |-- boot
+    |     |-- grub
+    |       |-- menu.lst
+    |       |-- stage2_eltorito
+    |-- kmain.c
+    |-- loader.s
+    |-- Makefile
+
+   You should now be able to start the OS with the simple command make
+   run, which will compile the kernel and boot it up in Bochs (as defined
+   in the Makefile above).
+
+3.5 Further Reading
+
+     * Kernigan & Richie's book, The C Programming Language, Second
+       Edition, [8] is great for learning about all the aspects of C.
+
+4 Output
+
+   This chapter will present how to display text on the console as well as
+   writing data to the serial port. Furthermore, we will create our first
+   driver, that is, code that acts as a layer between the kernel and the
+   hardware, providing a higher abstraction than communicating directly
+   with the hardware. The first part of this chapter is about creating a
+   driver for the framebuffer [26] to be able to display text on the
+   console. The second part shows how to create a driver for the serial
+   port. Bochs can store output from the serial port in a file,
+   effectively creating a logging mechanism for the operating system.
+
+4.1 Interacting with the Hardware
+
+   There are usually two different ways to interact with the hardware,
+   memory-mapped I/O and I/O ports.
+
+   If the hardware uses memory-mapped I/O then you can write to a specific
+   memory address and the hardware will be updated with the new data. One
+   example of this is the framebuffer, which will be discussed in more
+   detail later. For example, if you write the value 0x410F to address
+   0x000B8000, you will see the letter A in white color on a black
+   background (see the section on [153]the framebuffer for more details).
+
+   If the hardware uses I/O ports then the assembly code instructions out
+   and in must be used to communicate with the hardware. The instruction
+   out takes two parameters: the address of the I/O port and the data to
+   send. The instruction in takes a single parameter, the address of the
+   I/O port, and returns data from the hardware. One can think of I/O
+   ports as communicating with hardware the same way as you communicate
+   with a server using sockets. The cursor (the blinking rectangle) of the
+   framebuffer is one example of hardware controlled via I/O ports on a
+   PC.
+
+4.2 The Framebuffer
+
+   The framebuffer is a hardware device that is capable of displaying a
+   buffer of memory on the screen [26]. The framebuffer has 80 columns and
+   25 rows, and the row and column indices start at 0 (so rows are
+   labelled 0 - 24).
+
+4.2.1 Writing Text
+
+   Writing text to the console via the framebuffer is done with
+   memory-mapped I/O. The starting address of the memory-mapped I/O for
+   the framebuffer is 0x000B8000 [27]. The memory is divided into 16 bit
+   cells, where the 16 bits determine both the character, the foreground
+   color and the background color. The highest eight bits is the ASCII
+   [28] value of the character, bit 7 - 4 the background and bit 3 - 0 the
+   foreground, as can be seen in the following figure:
+Bit:     | 15 14 13 12 11 10 9 8 | 7 6 5 4 | 3 2 1 0 |
+Content: | ASCII                 | FG      | BG      |
+
+   The available colors are shown in the following table:
+   Color Value      Color Value       Color Value         Color Value
+   Black 0            Red 4       Dark grey 8         Light red 12
+    Blue 1        Magenta 5      Light blue 9     Light magenta 13
+   Green 2          Brown 6     Light green 10      Light brown 14
+    Cyan 3     Light grey 7      Light cyan 11            White 15
+
+   The first cell corresponds to row zero, column zero on the console.
+   Using an ASCII table, one can see that A corresponds to 65 or 0x41.
+   Therefore, to write the character A with a green foreground (2) and
+   dark grey background (8) at place (0,0), the following assembly code
+   instruction is used:
+    mov [0x000B8000], 0x4128
+
+   The second cell then corresponds to row zero, column one and its
+   address is therefore:
+    0x000B8000 + 16 = 0x000B8010
+
+   Writing to the framebuffer can also be done in C by treating the
+   address 0x000B8000 as a char pointer, char *fb = (char *) 0x000B8000.
+   Then, writing A at place (0,0) with green foreground and dark grey
+   background becomes:
+    fb[0] = 'A';
+    fb[1] = 0x28;
+
+   The following code shows how this can be wrapped into a function:
+    /** fb_write_cell:
+     *  Writes a character with the given foreground and background to position
+i
+     *  in the framebuffer.
+     *
+     *  @param i  The location in the framebuffer
+     *  @param c  The character
+     *  @param fg The foreground color
+     *  @param bg The background color
+     */
+    void fb_write_cell(unsigned int i, char c, unsigned char fg, unsigned char b
+g)
+    {
+        fb[i] = c;
+        fb[i + 1] = ((fg & 0x0F) << 4) | (bg & 0x0F)
+    }
+
+   The function can then be used as follows:
+    #define FB_GREEN     2
+    #define FB_DARK_GREY 8
+
+    fb_write_cell(0, 'A', FB_GREEN, FB_DARK_GREY);
+
+4.2.2 Moving the Cursor
+
+   Moving the cursor of the framebuffer is done via two different I/O
+   ports. The cursor's position is determined with a 16 bits integer: 0
+   means row zero, column zero; 1 means row zero, column one; 80 means row
+   one, column zero and so on. Since the position is 16 bits large, and
+   the out assembly code instruction argument is 8 bits, the position must
+   be sent in two turns, first 8 bits then the next 8 bits. The
+   framebuffer has two I/O ports, one for accepting the data, and one for
+   describing the data being received. Port 0x3D4 [29] is the port that
+   describes the data and port 0x3D5 [29] is for the data itself.
+
+   To set the cursor at row one, column zero (position 80 = 0x0050), one
+   would use the following assembly code instructions:
+    out 0x3D4, 14      ; 14 tells the framebuffer to expect the highest 8 bits o
+f the position
+    out 0x3D5, 0x00    ; sending the highest 8 bits of 0x0050
+    out 0x3D4, 15      ; 15 tells the framebuffer to expect the lowest 8 bits of
+ the position
+    out 0x3D5, 0x50    ; sending the lowest 8 bits of 0x0050
+
+   The out assembly code instruction can't be executed directly in C.
+   Therefore it is a good idea to wrap out in a function in assembly code
+   which can be accessed from C via the cdecl calling standard [25]:
+    global outb             ; make the label outb visible outside this file
+
+    ; outb - send a byte to an I/O port
+    ; stack: [esp + 8] the data byte
+    ;        [esp + 4] the I/O port
+    ;        [esp    ] return address
+    outb:
+        mov al, [esp + 8]    ; move the data to be sent into the al register
+        mov dx, [esp + 4]    ; move the address of the I/O port into the dx regi
+ster
+        out dx, al           ; send the data to the I/O port
+        ret                  ; return to the calling function
+
+   By storing this function in a file called io.s and also creating a
+   header io.h, the out assembly code instruction can be conveniently
+   accessed from C:
+    #ifndef INCLUDE_IO_H
+    #define INCLUDE_IO_H
+
+    /** outb:
+     *  Sends the given data to the given I/O port. Defined in io.s
+     *
+     *  @param port The I/O port to send the data to
+     *  @param data The data to send to the I/O port
+     */
+    void outb(unsigned short port, unsigned char data);
+
+    #endif /* INCLUDE_IO_H */
+
+   Moving the cursor can now be wrapped in a C function:
+    #include "io.h"
+
+    /* The I/O ports */
+    #define FB_COMMAND_PORT         0x3D4
+    #define FB_DATA_PORT            0x3D5
+
+    /* The I/O port commands */
+    #define FB_HIGH_BYTE_COMMAND    14
+    #define FB_LOW_BYTE_COMMAND     15
+
+    /** fb_move_cursor:
+     *  Moves the cursor of the framebuffer to the given position
+     *
+     *  @param pos The new position of the cursor
+     */
+    void fb_move_cursor(unsigned short pos)
+    {
+        outb(FB_COMMAND_PORT, FB_HIGH_BYTE_COMMAND);
+        outb(FB_DATA_PORT,    ((pos >> 8) & 0x00FF));
+        outb(FB_COMMAND_PORT, FB_LOW_BYTE_COMMAND);
+        outb(FB_DATA_PORT,    pos & 0x00FF);
+    }
+
+4.2.3 The Driver
+
+   The driver should provide an interface that the rest of the code in the
+   OS will use for interacting with the framebuffer. There is no right or
+   wrong in what functionality the interface should provide, but a
+   suggestion is to have a write function with the following declaration:
+    int write(char *buf, unsigned int len);
+
+   The write function writes the contents of the buffer buf of length len
+   to the screen. The write function should automatically advance the
+   cursor after a character has been written and scroll the screen if
+   necessary.
+
+4.3 The Serial Ports
+
+   The serial port [30] is an interface for communicating between hardware
+   devices and although it is available on almost all motherboards, it is
+   seldom exposed to the user in the form of a DE-9 connector nowadays.
+   The serial port is easy to use, and, more importantly, it can be used
+   as a logging utility in Bochs. If a computer has support for a serial
+   port, then it usually has support for multiple serial ports, but we
+   will only make use of one of the ports. This is because we will only
+   use the serial ports for logging. Furthermore, we will only use the
+   serial ports for output, not input. The serial ports are completely
+   controlled via I/O ports.
+
+4.3.1 Configuring the Serial Port
+
+   The first data that need to be sent to the serial port is configuration
+   data. In order for two hardware devices to be able to talk to each
+   other they must agree upon a couple of things. These things include:
+     * The speed used for sending data (bit or baud rate)
+     * If any error checking should be used for the data (parity bit, stop
+       bits)
+     * The number of bits that represent a unit of data (data bits)
+
+4.3.2 Configuring the Line
+
+   Configuring the line means to configure how data is being sent over the
+   line. The serial port has an I/O port, the line command port, that is
+   used for configuration.
+
+   First the speed for sending data will be set. The serial port has an
+   internal clock that runs at 115200 Hz. Setting the speed means sending
+   a divisor to the serial port, for example sending 2 results in a speed
+   of 115200 / 2 = 57600 Hz.
+
+   The divisor is a 16 bit number but we can only send 8 bits at a time.
+   We must therefore send an instruction telling the serial port to first
+   expect the highest 8 bits, then the lowest 8 bits. This is done by
+   sending 0x80 to the line command port. An example is shown below:
+    #include "io.h" /* io.h is implement in the section "Moving the cursor" */
+
+    /* The I/O ports */
+
+    /* All the I/O ports are calculated relative to the data port. This is becau
+se
+     * all serial ports (COM1, COM2, COM3, COM4) have their ports in the same
+     * order, but they start at different values.
+     */
+
+    #define SERIAL_COM1_BASE                0x3F8      /* COM1 base port */
+
+    #define SERIAL_DATA_PORT(base)          (base)
+    #define SERIAL_FIFO_COMMAND_PORT(base)  (base + 2)
+    #define SERIAL_LINE_COMMAND_PORT(base)  (base + 3)
+    #define SERIAL_MODEM_COMMAND_PORT(base) (base + 4)
+    #define SERIAL_LINE_STATUS_PORT(base)   (base + 5)
+
+    /* The I/O port commands */
+
+    /* SERIAL_LINE_ENABLE_DLAB:
+     * Tells the serial port to expect first the highest 8 bits on the data port
+,
+     * then the lowest 8 bits will follow
+     */
+    #define SERIAL_LINE_ENABLE_DLAB         0x80
+
+    /** serial_configure_baud_rate:
+     *  Sets the speed of the data being sent. The default speed of a serial
+     *  port is 115200 bits/s. The argument is a divisor of that number, hence
+     *  the resulting speed becomes (115200 / divisor) bits/s.
+     *
+     *  @param com      The COM port to configure
+     *  @param divisor  The divisor
+     */
+    void serial_configure_baud_rate(unsigned short com, unsigned short divisor)
+    {
+        outb(SERIAL_LINE_COMMAND_PORT(com),
+             SERIAL_LINE_ENABLE_DLAB);
+        outb(SERIAL_DATA_PORT(com),
+             (divisor >> 8) & 0x00FF);
+        outb(SERIAL_DATA_PORT(com),
+             divisor & 0x00FF);
+    }
+
+   The way that data should be sent must be configured. This is also done
+   via the line command port by sending a byte. The layout of the 8 bits
+   looks like the following:
+Bit:     | 7 | 6 | 5 4 3 | 2 | 1 0 |
+Content: | d | b | prty  | s | dl  |
+
+   A description for each name can be found in the table below (and in
+   [31]):
+   Name Description
+   d Enables (d = 1) or disables (d = 0) DLAB
+   b If break control is enabled (b = 1) or disabled (b = 0)
+   prty The number of parity bits to use
+   s The number of stop bits to use (s = 0 equals 1, s = 1 equals 1.5 or
+   2)
+   dl Describes the length of the data
+
+   We will use the mostly standard value 0x03 [31], meaning a length of 8
+   bits, no parity bit, one stop bit and break control disabled. This is
+   sent to the line command port, as seen in the following example:
+    /** serial_configure_line:
+     *  Configures the line of the given serial port. The port is set to have a
+     *  data length of 8 bits, no parity bits, one stop bit and break control
+     *  disabled.
+     *
+     *  @param com  The serial port to configure
+     */
+    void serial_configure_line(unsigned short com)
+    {
+        /* Bit:     | 7 | 6 | 5 4 3 | 2 | 1 0 |
+         * Content: | d | b | prty  | s | dl  |
+         * Value:   | 0 | 0 | 0 0 0 | 0 | 1 1 | = 0x03
+         */
+        outb(SERIAL_LINE_COMMAND_PORT(com), 0x03);
+    }
+
+   The article on OSDev [31] has a more in-depth explanation of the
+   values.
+
+4.3.3 Configuring the Buffers
+
+   When data is transmitted via the serial port it is placed in buffers,
+   both when receiving and sending data. This way, if you send data to the
+   serial port faster than it can send it over the wire, it will be
+   buffered. However, if you send too much data too fast the buffer will
+   be full and data will be lost. In other words, the buffers are FIFO
+   queues. The FIFO queue configuration byte looks like the following
+   figure:
+Bit:     | 7 6 | 5  | 4 | 3   | 2   | 1   | 0 |
+Content: | lvl | bs | r | dma | clt | clr | e |
+
+   A description for each name can be found in the table below:
+   Name Description
+    lvl How many bytes should be stored in the FIFO buffers
+     bs If the buffers should be 16 or 64 bytes large
+      r Reserved for future use
+    dma How the serial port data should be accessed
+    clt Clear the transmission FIFO buffer
+    clr Clear the receiver FIFO buffer
+      e If the FIFO buffer should be enabled or not
+
+   We use the value 0xC7 = 11000111 that:
+     * Enables FIFO
+     * Clear both receiver and transmission FIFO queues
+     * Use 14 bytes as size of queue
+
+   The WikiBook on serial programming [32] explains the values in more
+   depth.
+
+4.3.4 Configuring the Modem
+
+   The modem control register is used for very simple hardware flow
+   control via the Ready To Transmit (RTS) and Data Terminal Ready (DTR)
+   pins. When configuring the serial port we want RTS and DTR to be 1,
+   which means that we are ready to send data.
+
+   The modem configuration byte is shown in the following figure:
+Bit:     | 7 | 6 | 5  | 4  | 3   | 2   | 1   | 0   |
+Content: | r | r | af | lb | ao2 | ao1 | rts | dtr |
+
+   A description for each name can be found in the table below:
+   Name Description
+      r Reserved
+     af Autoflow control enabled
+     lb Loopback mode (used for debugging serial ports)
+    ao2 Auxiliary output 2, used for receiving interrupts
+    ao1 Auxiliary output 1
+    rts Ready To Transmit
+    dtr Data Terminal Ready
+
+   We don't need to enable interrupts, because we won't handle any
+   received data. Therefore we use the configuration value 0x03 = 00000011
+   (RTS = 1 and DTS = 1).
+
+4.3.5 Writing Data to the Serial Port
+
+   Writing data to the serial port is done via the data I/O port. However,
+   before writing, the transmit FIFO queue has to be empty (all previous
+   writes must have finished). The transmit FIFO queue is empty if bit 5
+   of the line status I/O port is equal to one.
+
+   Reading the contents of an I/O port is done via the in assembly code
+   instruction. There is no way to use the in assembly code instruction
+   from C, therefore it has to be wrapped (the same way as the out
+   assembly code instruction):
+    global inb
+
+    ; inb - returns a byte from the given I/O port
+    ; stack: [esp + 4] The address of the I/O port
+    ;        [esp    ] The return address
+    inb:
+        mov dx, [esp + 4]       ; move the address of the I/O port to the dx reg
+ister
+        in  al, dx              ; read a byte from the I/O port and store it in
+the al register
+        ret                     ; return the read byte
+    /* in file io.h */
+
+    /** inb:
+     *  Read a byte from an I/O port.
+     *
+     *  @param  port The address of the I/O port
+     *  @return      The read byte
+     */
+    unsigned char inb(unsigned short port);
+
+   Checking if the transmit FIFO is empty can then be done from C:
+    #include "io.h"
+
+    /** serial_is_transmit_fifo_empty:
+     *  Checks whether the transmit FIFO queue is empty or not for the given COM
+     *  port.
+     *
+     *  @param  com The COM port
+     *  @return 0 if the transmit FIFO queue is not empty
+     *          1 if the transmit FIFO queue is empty
+     */
+    int serial_is_transmit_fifo_empty(unsigned int com)
+    {
+        /* 0x20 = 0010 0000 */
+        return inb(SERIAL_LINE_STATUS_PORT(com)) & 0x20;
+    }
+
+   Writing to a serial port means spinning as long as the transmit FIFO
+   queue isn't empty, and then writing the data to the data I/O port.
+
+4.3.6 Configuring Bochs
+
+   To save the output from the first serial serial port the Bochs
+   configuration file bochsrc.txt must be updated. The com1 configuration
+   instructs Bochs how to handle first serial port:
+    com1: enabled=1, mode=file, dev=com1.out
+
+   The output from serial port one will now be stored in the file
+   com1.out.
+
+4.3.7 The Driver
+
+   We recommend that you implement a write function for the serial port
+   similar to the write function in the driver for the framebuffer. To
+   avoid name clashes with the write function for the framebuffer it is a
+   good idea to name the functions fb_write and serial_write to
+   distinguish them.
+
+   We further recommend that you try to write a printf-like function, see
+   section 7.3 in [8]. The printf function could take an additional
+   argument to decide to which device to write the output (framebuffer or
+   serial).
+
+   A final recommendation is that you create some way of distinguishing
+   the severeness of the log messages, for example by prepending the
+   messages with DEBUG, INFO or ERROR.
+
+4.4 Further Reading
+
+     * The book "Serial programming" (available on WikiBooks) has a great
+       section on programming the serial port,
+       [154]http://en.wikibooks.org/wiki/Serial_Programming/8250_UART_Prog
+       ramming#UART_Registers
+     * The OSDev wiki has a page with a lot of information about the
+       serial ports, [155]http://wiki.osdev.org/Serial_ports
+
+5 Segmentation
+
+   Segmentation in x86 means accessing the memory through segments.
+   Segments are portions of the address space, possibly overlapping,
+   specified by a base address and a limit. To address a byte in segmented
+   memory you use a 48-bit logical address: 16 bits that specifies the
+   segment and 32-bits that specifies what offset within that segment you
+   want. The offset is added to the base address of the segment, and the
+   resulting linear address is checked against the segment's limit - see
+   the figure below. If everything works out fine (including access-rights
+   checks ignored for now) the result is a linear address. When paging is
+   disabled, then the linear address space is mapped 1:1 onto the physical
+   address space, and the physical memory can be accessed. (See the
+   chapter [156]"Paging" for how to enable paging.)
+   Translation of logical addresses to linear addresses.
+
+   Translation of logical addresses to linear addresses.
+
+   To enable segmentation you need to set up a table that describes each
+   segment - a segment descriptor table. In x86, there are two types of
+   descriptor tables: the Global Descriptor Table (GDT) and Local
+   Descriptor Tables (LDT). An LDT is set up and managed by user-space
+   processes, and all processes have their own LDT. LDTs can be used if a
+   more complex segmentation model is desired - we won't use it. The GDT
+   is shared by everyone - it's global.
+
+   As we discuss in the sections on virtual memory and paging,
+   segmentation is rarely used more than in a minimal setup, similar to
+   what we do below.
+
+5.1 Accessing Memory
+
+   Most of the time when accessing memory there is no need to explicitly
+   specify the segment to use. The processor has six 16-bit segment
+   registers: cs, ss, ds, es, gs and fs. The register cs is the code
+   segment register and specifies the segment to use when fetching
+   instructions. The register ss is used whenever accessing the stack
+   (through the stack pointer esp), and ds is used for other data
+   accesses. The OS is free to use the registers es, gs and fs however it
+   want.
+
+   Below is an example showing implicit use of the segment registers:
+    func:
+        mov eax, [esp+4]
+        mov ebx, [eax]
+        add ebx, 8
+        mov [eax], ebx
+        ret
+
+   The above example can be compared with the following one that makes
+   explicit use of the segment registers:
+    func:
+        mov eax, [ss:esp+4]
+        mov ebx, [ds:eax]
+        add ebx, 8
+        mov [ds:eax], ebx
+        ret
+
+   You don't need to use ss for storing the stack segment selector, or ds
+   for the data segment selector. You could store the stack segment
+   selector in ds and vice versa. However, in order to use the implicit
+   style shown above, you must store the segment selectors in their
+   indented registers.
+
+   Segment descriptors and their fields are described in figure 3-8 in the
+   Intel manual [33].
+
+5.2 The Global Descriptor Table (GDT)
+
+   A GDT/LDT is an array of 8-byte segment descriptors. The first
+   descriptor in the GDT is always a null descriptor and can never be used
+   to access memory. At least two segment descriptors (plus the null
+   descriptor) are needed for the GDT, because the descriptor contains
+   more information than just the base and limit fields. The two most
+   relevant fields for us are the Type field and the Descriptor Privilege
+   Level (DPL) field.
+
+   Table 3-1 in chapter 3 of the Intel manual [33] specifies the values
+   for the Type field. The table shows that the Type field can't be both
+   writable and executable at the same time. Therefore, two segments are
+   needed: one segment for executing code to put in cs (Type is
+   Execute-only or Execute-Read) and one segment for reading and writing
+   data (Type is Read/Write) to put in the other segment registers.
+
+   The DPL specifies the privilege levels required to use the segment. x86
+   allows for four privilege levels (PL), 0 to 3, where PL0 is the most
+   privileged. In most operating systems (eg. Linux and Windows), only PL0
+   and PL3 are used. However, some operating system, such as MINIX, make
+   use of all levels. The kernel should be able to do anything, therefore
+   it uses segments with DPL set to 0 (also called kernel mode). The
+   current privilege level (CPL) is determined by the segment selector in
+   cs.
+
+   The segments needed are described in the table below.
+
+   CAPTION: The segment descriptors needed.
+
+   Index Offset Name                Address range           Type DPL
+       0   0x00 null descriptor
+       1   0x08 kernel code segment 0x00000000 - 0xFFFFFFFF RX   PL0
+       2   0x10 kernel data segment 0x00000000 - 0xFFFFFFFF RW   PL0
+
+   Note that the segments overlap - they both encompass the entire linear
+   address space. In our minimal setup we'll only use segmentation to get
+   privilege levels. See the Intel manual [33], chapter 3, for details on
+   the other descriptor fields.
+
+5.3 Loading the GDT
+
+   Loading the GDT into the processor is done with the lgdt assembly code
+   instruction, which takes the address of a struct that specifies the
+   start and size of the GDT. It is easiest to encode this information
+   using a [157]"packed struct" as shown in the following example:
+    struct gdt {
+        unsigned int address;
+        unsigned short size;
+    } __attribute__((packed));
+
+   If the content of the eax register is the address to such a struct,
+   then the GDT can be loaded with the assembly code shown below:
+    lgdt [eax]
+
+   It might be easier if you make this instruction available from C, the
+   same way as was done with the assembly code instructions in and out.
+
+   After the GDT has been loaded the segment registers needs to be loaded
+   with their corresponding segment selectors. The content of a segment
+   selector is described in the figure and table below:
+Bit:     | 15                                3 | 2  | 1 0 |
+Content: | offset (index)                      | ti | rpl |
+
+   CAPTION: The layout of segment selectors.
+
+   Name Description
+   rpl Requested Privilege Level - we want to execute in PL0 for now.
+   ti Table Indicator. 0 means that this specifies a GDT segment, 1 means
+   an LDT Segment.
+   offset (index) Offset within descriptor table.
+
+   The offset of the segment selector is added to the start of the GDT to
+   get the address of the segment descriptor: 0x08 for the first
+   descriptor and 0x10 for the second, since each descriptor is 8 bytes.
+   The Requested Privilege Level (RPL) should be 0 since the kernel of the
+   OS should execute in privilege level 0.
+
+   Loading the segment selector registers is easy for the data registers -
+   just copy the correct offsets to the registers:
+    mov ds, 0x10
+    mov ss, 0x10
+    mov es, 0x10
+    .
+    .
+    .
+
+   To load cs we have to do a "far jump":
+    ; code here uses the previous cs
+    jmp 0x08:flush_cs   ; specify cs when jumping to flush_cs
+
+    flush_cs:
+        ; now we've changed cs to 0x08
+
+   A far jump is a jump where we explicitly specify the full 48-bit
+   logical address: the segment selector to use and the absolute address
+   to jump to. It will first set cs to 0x08 and then jump to flush_cs
+   using its absolute address.
+
+5.4 Further Reading
+
+     * Chapter 3 of the Intel manual [33] is filled with low-level and
+       technical details about segmentation.
+     * The OSDev wiki has a page about segmentation:
+       [158]http://wiki.osdev.org/Segmentation
+     * The Wikipedia page on x86 segmentation might be worth looking into:
+       [159]http://en.wikipedia.org/wiki/X86_memory_segmentation
+
+6 Interrupts and Input
+
+   Now that the OS can produce output it would be nice if it also could
+   get some input. (The operating system must be able to handle interrupts
+   in order to read information from the keyboard). An interrupt occurs
+   when a hardware device, such as the keyboard, the serial port or the
+   timer, signals the CPU that the state of the device has changed. The
+   CPU itself can also send interrupts due to program errors, for example
+   when a program references memory it doesn't have access to, or when a
+   program divides a number by zero. Finally, there are also software
+   intterupts, which are interrupts that are caused by the int assembly
+   code instruction, and they are often used for system calls.
+
+6.1 Interrupts Handlers
+
+   Interrupts are handled via the Interrupt Descriptor Table (IDT). The
+   IDT describes a handler for each interrupt. The interrupts are numbered
+   (0 - 255) and the handler for interrupt i is defined at the ith
+   position in the table. There are three different kinds of handlers for
+   interrupts:
+     * Task handler
+     * Interrupt handler
+     * Trap handler
+
+   The task handlers use functionality specific to the Intel version of
+   x86, so they won't be covered here (see the Intel manual [33], chapter
+   6, for more info). The only difference between an interrupt handler and
+   a trap handler is that the interrupt handler disables interrupts, which
+   means you cannot get an interrupt while at the same time handling an
+   interrupt. In this book, we will use trap handlers and disable
+   interrupts manually when we need to.
+
+6.2 Creating an Entry in the IDT
+
+   An entry in the IDT for an interrupt handler consists of 64 bits. The
+   highest 32 bits are shown in the figure below:
+Bit:     | 31              16 | 15 | 14 13 | 12 | 11 | 10 9 8 | 7 6 5 | 4 3 2 1
+0 |
+Content: | offset high        | P  | DPL   | 0  | D  | 1  1 0 | 0 0 0 | reserved
+  |
+
+   The lowest 32 bits are presented in the following figure:
+Bit:     | 31              16 | 15              0 |
+Content: | segment selector   | offset low        |
+
+   A description for each name can be found in the table below:
+   Name Description
+   offset high The 16 highest bits of the 32 bit address in the segment.
+   offset low The 16 lowest bits of the 32 bits address in the segment.
+   p If the handler is present in memory or not (1 = present, 0 = not
+   present).
+   DPL Descriptor Privilige Level, the privilege level the handler can be
+   called from (0, 1, 2, 3).
+   D Size of gate, (1 = 32 bits, 0 = 16 bits).
+   segment selector The offset in the GDT.
+   r Reserved.
+
+   The offset is a pointer to code (preferably an assembly code label).
+   For example, to create an entry for a handler whose code starts at
+   0xDEADBEEF and that runs in privilege level 0 (therefore using the same
+   code segment selector as the kernel) the following two bytes would be
+   used:
+    0xDEAD8E00
+    0x0008BEEF
+
+   If the IDT is represented as an unsigned integer idt[512] then to
+   register the above example as an handler for interrupt 0
+   (divide-by-zero), the following code would be used:
+    idt[0] = 0xDEAD8E00
+    idt[1] = 0x0008BEEF
+
+   As written in the chapter [160]"Getting to C", we recommend that you
+   instead of using bytes (or unsigned integers) use packed structures to
+   make the code more readable.
+
+6.3 Handling an Interrupt
+
+   When an interrupt occurs the CPU will push some information about the
+   interrupt onto the stack, then look up the appropriate interrupt hander
+   in the IDT and jump to it. The stack at the time of the interrupt will
+   look like the following:
+    [esp + 12] eflags
+    [esp + 8]  cs
+    [esp + 4]  eip
+    [esp]      error code?
+
+   The reason for the question mark behind error code is that not all
+   interrupts create an error code. The specific CPU interrupts that put
+   an error code on the stack are 8, 10, 11, 12, 13, 14 and 17. The error
+   code can be used by the interrupt handler to get more information on
+   what has happened. Also, note that the interrupt number is not pushed
+   onto the stack. We can only determine what interrupt has occurred by
+   knowing what code is executing - if the handler registered for
+   interrupt 17 is executing, then interrupt 17 has occurred.
+
+   Once the interrupt handler is done, it uses the iret instruction to
+   return. The instruction iret expects the stack to be the same as at the
+   time of the interrupt (see the figure above). Therefore, any values
+   pushed onto the stack by the interrupt handler must be popped. Before
+   returning, iret restores eflags by popping the value from the stack and
+   then finally jumps to cs:eip as specified by the values on the stack.
+
+   The interrupt handler has to be written in assembly code, since all
+   registers that the interrupt handlers use must be preserved by pushing
+   them onto the stack. This is because the code that was interrupted
+   doesn't know about the interrupt and will therefore expect that its
+   registers stay the same. Writing all the logic of the interrupt handler
+   in assembly code will be tiresome. Creating a handler in assembly code
+   that saves the registers, calls a C function, restores the registers
+   and finally executes iret is a good idea!
+
+   The C handler should get the state of the registers, the state of the
+   stack and the number of the interrupt as arguments. The following
+   definitions can for example be used:
+    struct cpu_state {
+        unsigned int eax;
+        unsigned int ebx;
+        unsigned int ecx;
+        .
+        .
+        .
+        unsigned int esp;
+    } __attribute__((packed));
+
+    struct stack_state {
+        unsigned int error_code;
+        unsigned int eip;
+        unsigned int cs;
+        unsigned int eflags;
+    } __attribute__((packed));
+
+    void interrupt_handler(struct cpu_state cpu, struct stack_state stack, unsig
+ned int interrupt);
+
+6.4 Creating a Generic Interrupt Handler
+
+   Since the CPU does not push the interrupt number on the stack it is a
+   little tricky to write a generic interrupt handler. This section will
+   use macros to show how it can be done. Writing one version for each
+   interrupt is tedious - it is better to use the macro functionality of
+   NASM [34]. And since not all interrupts produce an error code the value
+   0 will be added as the "error code" for interrupts without an error
+   code. The following code shows an example of how this can be done:
+    %macro no_error_code_interrupt_handler %1
+    global interrupt_handler_%1
+    interrupt_handler_%1:
+        push    dword 0                     ; push 0 as error code
+        push    dword %1                    ; push the interrupt number
+        jmp     common_interrupt_handler    ; jump to the common handler
+    %endmacro
+
+    %macro error_code_interrupt_handler %1
+    global interrupt_handler_%1
+    interrupt_handler_%1:
+        push    dword %1                    ; push the interrupt number
+        jmp     common_interrupt_handler    ; jump to the common handler
+    %endmacro
+
+    common_interrupt_handler:               ; the common parts of the generic in
+terrupt handler
+        ; save the registers
+        push    eax
+        push    ebx
+        .
+        .
+        .
+        push    ebp
+
+        ; call the C function
+        call    interrupt_handler
+
+        ; restore the registers
+        pop     ebp
+        .
+        .
+        .
+        pop     ebx
+        pop     eax
+
+        ; restore the esp
+        add     esp, 8
+
+        ; return to the code that got interrupted
+        iret
+
+    no_error_code_interrupt_handler 0       ; create handler for interrupt 0
+    no_error_code_interrupt_handler 1       ; create handler for interrupt 1
+    .
+    .
+    .
+    error_code_handler              7       ; create handler for interrupt 7
+    .
+    .
+    .
+
+   The common_interrupt_handler does the following:
+     * Push the registers on the stack.
+     * Call the C function interrupt_handler.
+     * Pop the registers from the stack.
+     * Add 8 to esp (because of the error code and the interrupt number
+       pushed earlier).
+     * Execute iret to return to the interrupted code.
+
+   Since the macros declare global labels the addresses of the interrupt
+   handlers can be accessed from C or assembly code when creating the IDT.
+
+6.5 Loading the IDT
+
+   The IDT is loaded with the lidt assembly code instruction which takes
+   the address of the first element in the table. It is easiest to wrap
+   this instruction and use it from C:
+    global  load_idt
+
+    ; load_idt - Loads the interrupt descriptor table (IDT).
+    ; stack: [esp + 4] the address of the first entry in the IDT
+    ;        [esp    ] the return address
+    load_idt:
+        mov     eax, [esp+4]    ; load the address of the IDT into register eax
+        lidt    eax             ; load the IDT
+        ret                     ; return to the calling function
+
+6.6 Programmable Interrupt Controller (PIC)
+
+   To start using hardware interrupts you must first configure the
+   Programmable Interrupt Controller (PIC). The PIC makes it possible to
+   map signals from the hardware to interrupts. The reasons for
+   configuring the PIC are:
+     * Remap the interrupts. The PIC uses interrupts 0 - 15 for hardware
+       interrupts by default, which conflicts with the CPU interrupts.
+       Therefore the PIC interrupts must be remapped to another interval.
+     * Select which interrupts to receive. You probably don't want to
+       receive interrupts from all devices since you don't have code that
+       handles these interrupts anyway.
+     * Set up the correct mode for the PIC.
+
+   In the beginning there was only one PIC (PIC 1) and eight interrupts.
+   As more hardware were added, 8 interrupts were too few. The solution
+   chosen was to chain on another PIC (PIC 2) on the first PIC (see
+   interrupt 2 on PIC 1).
+
+   The hardware interrupts are shown in the table below:
+   PIC 1 Hardware    PIC 2 Hardware
+       0 Timer           8 Real Time Clock
+       1 Keyboard        9 General I/O
+       2 PIC 2          10 General I/O
+       3 COM 2          11 General I/O
+       4 COM 1          12 General I/O
+       5 LPT 2          13 Coprocessor
+       6 Floppy disk    14 IDE Bus
+       7 LPT 1          15 IDE Bus
+
+   A great tutorial for configuring the PIC can be found at the SigOPS
+   website [35]. We won't repeat that information here.
+
+   Every interrupt from the PIC has to be acknowledged - that is, sending
+   a message to the PIC confirming that the interrupt has been handled. If
+   this isn't done the PIC won't generate any more interrupts.
+
+   Acknowledging a PIC interrupt is done by sending the byte 0x20 to the
+   PIC that raised the interrupt. Implementing a pic_acknowledge function
+   can thus be done as follows:
+    #include "io.h"
+
+    #define PIC1_PORT_A 0x20
+    #define PIC2_PORT_A 0xA0
+
+    /* The PIC interrupts have been remapped */
+    #define PIC1_START_INTERRUPT 0x20
+    #define PIC2_START_INTERRUPT 0x28
+    #define PIC2_END_INTERRUPT   PIC2_START_INTERRUPT + 7
+
+    #define PIC_ACK     0x20
+
+    /** pic_acknowledge:
+     *  Acknowledges an interrupt from either PIC 1 or PIC 2.
+     *
+     *  @param num The number of the interrupt
+     */
+    void pic_acknowledge(unsigned integer interrupt)
+    {
+        if (interrupt < PIC1_START_INTERRUPT || interrupt > PIC2_END_INTERRUPT)
+{
+          return;
+        }
+
+        if (interrupt < PIC2_START_INTERRUPT) {
+          outb(PIC1_PORT_A, PIC_ACK);
+        } else {
+          outb(PIC2_PORT_A, PIC_ACK);
+        }
+    }
+
+6.7 Reading Input from the Keyboard
+
+   The keyboard does not generate ASCII characters, it generates scan
+   codes. A scan code represents a button - both presses and releases. The
+   scan code representing the just pressed button can be read from the
+   keyboard's data I/O port which has address 0x60. How this can be done
+   is shown in the following example:
+    #include "io.h"
+
+    #define KBD_DATA_PORT   0x60
+
+    /** read_scan_code:
+     *  Reads a scan code from the keyboard
+     *
+     *  @return The scan code (NOT an ASCII character!)
+     */
+    unsigned char read_scan_code(void)
+    {
+        return inb(KBD_DATA_PORT);
+    }
+
+   The next step is to write a function that translates a scan code to the
+   corresponding ASCII character. If you want to map the scan codes to
+   ASCII characters as is done on an American keyboard then Andries
+   Brouwer has a great tutorial [36].
+
+   Remember, since the keyboard interrupt is raised by the PIC, you must
+   call pic_acknowledge at the end of the keyboard interrupt handler.
+   Also, the keyboard will not send you any more interrupts until you read
+   the scan code from the keyboard.
+
+6.8 Further Reading
+
+     * The OSDev wiki has a great page on interrupts,
+       [161]http://wiki.osdev.org/Interrupts
+     * Chapter 6 of Intel Manual 3a [33] describes everything there is to
+       know about interrupts.
+
+7 The Road to User Mode
+
+   Now that the kernel boots, prints to screen and reads from keyboard -
+   what do we do? Usually, a kernel is not supposed to do the application
+   logic itself, but leave that for applications. The kernel creates the
+   proper abstractions (for memory, files, devices) to make application
+   development easier, performs tasks on behalf of applications (system
+   calls) and [162]schedules processes.
+
+   User mode, in contrast with kernel mode, is the environment in which
+   the user's programs execute. This environment is less privileged than
+   the kernel, and will prevent (badly written) user programs from messing
+   with other programs or the kernel. Badly written kernels are free to
+   mess up what they want.
+
+   There's quite a way to go until the OS created in this book can execute
+   programs in user mode, but this chapter will show how to easily execute
+   a small program in kernel mode.
+
+7.1 Loading an External Program
+
+   Where do we get the external program from? Somehow we need to load the
+   code we want to execute into memory. More feature-complete operating
+   systems usually have drivers and file systems that enable them to load
+   the software from a CD-ROM drive, a hard disk or other persistent
+   media.
+
+   Instead of creating all these drivers and file systems we will use a
+   feature in GRUB called modules to load the program.
+
+7.1.1 GRUB Modules
+
+   GRUB can load arbitrary files into memory from the ISO image, and these
+   files are usually referred to as modules. To make GRUB load a module,
+   edit the file iso/boot/grub/menu.lst and add the following line at the
+   end of the file:
+    module /modules/program
+
+   Now create the folder iso/modules:
+    mkdir -p iso/modules
+
+   The application program will be created later in this chapter.
+
+   The code that calls kmain must be updated to pass information to kmain
+   about where it can find the modules. We also want to tell GRUB that it
+   should align all the modules on page boundaries when loading them (see
+   the chapter [163]"Paging" for details about page alignment).
+
+   To instruct GRUB how to load our modules, the "multiboot header" - the
+   first bytes of the kernel - must be updated as follows:
+    ; in file `loader.s`
+
+
+    MAGIC_NUMBER    equ 0x1BADB002      ; define the magic number constant
+    ALIGN_MODULES   equ 0x00000001      ; tell GRUB to align modules
+
+    ; calculate the checksum (all options + checksum should equal 0)
+    CHECKSUM        equ -(MAGIC_NUMBER + ALIGN_MODULES)
+
+    section .text:                      ; start of the text (code) section
+    align 4                             ; the code must be 4 byte aligned
+        dd MAGIC_NUMBER                 ; write the magic number
+        dd ALIGN_MODULES                ; write the align modules instruction
+        dd CHECKSUM                     ; write the checksum
+
+   GRUB will also store a pointer to a struct in the register ebx that,
+   among other things, describes at which addresses the modules are
+   loaded. Therefore, you probably want to push ebx on the stack before
+   calling kmain to make it an argument for kmain.
+
+7.2 Executing a Program
+
+7.2.1 A Very Simple Program
+
+   A program written at this stage can only perform a few actions.
+   Therefore, a very short program that writes a value to a register
+   suffices as a test program. Halting Bochs after a while and then check
+   that register contains the correct number by looking in the Bochs log
+   will verify that the program has run. This is an example of such a
+   short program:
+    ; set eax to some distinguishable number, to read from the log afterwards
+    mov eax, 0xDEADBEEF
+
+    ; enter infinite loop, nothing more to do
+    ; $ means "beginning of line", ie. the same instruction
+    jmp $
+
+7.2.2 Compiling
+
+   Since our kernel cannot parse advanced executable formats we need to
+   compile the code into a flat binary. NASM can do this with the flag -f:
+    nasm -f bin program.s -o program
+
+   This is all we need. You must now move the file program to the folder
+   iso/modules.
+
+7.2.3 Finding the Program in Memory
+
+   Before jumping to the program we must find where it resides in memory.
+   Assuming that the contents of ebx is passed as an argument to kmain, we
+   can do this entirely from C.
+
+   The pointer in ebx points to a multiboot structure [19]. Download the
+   multiboot.h file from
+   [164]http://www.gnu.org/software/grub/manual/multiboot/html_node/multib
+   oot.h.html, which describes the structure.
+
+   The pointer passed to kmain in the ebx register can be cast to a
+   multiboot_info_t pointer. The address of the first module is in the
+   field mods_addr. The following code shows an example:
+    int kmain(/* additional arguments */ unsigned int ebx)
+    {
+        multiboot_info_t *mbinfo = (multiboot_info_t *) ebx;
+        unsigned int address_of_module = mbinfo->mods_addr;
+    }
+
+   However, before just blindly following the pointer, you should check
+   that the module got loaded correctly by GRUB. This can be done by
+   checking the flags field of the multiboot_info_t structure. You should
+   also check the field mods_count to make sure it is exactly 1. For more
+   details about the multiboot structure, see the multiboot documentation
+   [19].
+
+7.2.4 Jumping to the Code
+
+   The only thing left to do is to jump to the code loaded by GRUB. Since
+   it is easier to parse the multiboot structure in C than assembly code,
+   calling the code from C is more convenient (it can of course be done
+   with jmp or call in assembly code as well). The C code could look like
+   this:
+    typedef void (*call_module_t)(void);
+    /* ... */
+    call_module_t start_program = (call_module_t) address_of_module;
+    start_program();
+    /* we'll never get here, unless the module code returns */
+
+   If we start the kernel, wait until it has run and entered the infinite
+   loop in the program, and then halt Bochs, we should see 0xDEADBEEF in
+   the register eax via the Bochs log. We have successfully started a
+   program in our OS!
+
+7.3 The Beginning of User Mode
+
+   The program we've written now runs at the same privilege level as the
+   kernel - we've just entered it in a somewhat peculiar way. To enable
+   applications to execute at a different privilege level we'll need to,
+   beside [165]segmentation, do [166]paging and [167]page frame
+   allocation.
+
+   It's quite a lot of work and technical details to go through, but in a
+   few chapters you'll have working user mode programs.
+
+8 A Short Introduction to Virtual Memory
+
+   Virtual memory is an abstraction of physical memory. The purpose of
+   virtual memory is generally to simplify application development and to
+   let processes address more memory than what is actually physically
+   present in the machine. We also don't want applications messing with
+   the kernel or other applications' memory due to security.
+
+   In the x86 architecture, virtual memory can be accomplished in two
+   ways: segmentation and paging. Paging is by far the most common and
+   versatile technique, and we'll implement it the next chapter. Some use
+   of segmentation is still necessary to allow for code to execute under
+   different privilege levels.
+
+   Managing memory is a big part of what an operating system does.
+   [168]Paging and [169]page frame allocation deals with that.
+
+   Segmentation and paging is described in the [33], chapter 3 and 4.
+
+8.1 Virtual Memory Through Segmentation?
+
+   You could skip paging entirely and just use segmentation for virtual
+   memory. Each user mode process would get its own segment, with base
+   address and limit properly set up. This way no process can see the
+   memory of another process. A problem with this is that the physical
+   memory for a process needs to be contiguous (or at least it is very
+   convenient if it is). Either we need to know in advance how much memory
+   the program will require (unlikely), or we can move the memory segments
+   to places where they can grow when the limit is reached (expensive,
+   causes fragmentation - can result in "out of memory" even though enough
+   memory is available). Paging solves both these problems.
+
+   It is interesting to note that in x86_64 (the 64-bit version of the x86
+   architecture), segmentation is almost completely removed.
+
+8.2 Further Reading
+
+     * LWN.net has an article on virtual memory:
+       [170]http://lwn.net/Articles/253361/
+     * Gustavo Duarte has also written an article about virtual memory:
+       [171]http://duartes.org/gustavo/blog/post/memory-translation-and-se
+       gmentation
+
+9 Paging
+
+   Segmentation translates a logical address into a linear address. Paging
+   translates these linear addresses onto the physical address space, and
+   determines access rights and how the memory should be cached.
+
+9.1 Why Paging?
+
+   Paging is the most common technique used in x86 to enable virtual
+   memory. Virtual memory through paging means that each process will get
+   the impression that the available memory range is 0x00000000 -
+   0xFFFFFFFF even though the actual size of the memory might be much
+   less. It also means that when a process addresses a byte of memory it
+   will use a virtual (linear) address instead of physical one. The code
+   in the user process won't notice any difference (except for execution
+   delays). The linear address gets translated to a physical address by
+   the MMU and the page table. If the virtual address isn't mapped to a
+   physical address, the CPU will raise a page fault interrupt.
+
+   Paging is optional, and some operating systems do not make use of it.
+   But if we want to mark certain areas of memory accessible only to code
+   running at a certain privilege level (to be able to have processes
+   running at different privilege levels), paging is the neatest way to do
+   it.
+
+9.2 Paging in x86
+
+   Paging in x86 (chapter 4 in the Intel manual [33]) consists of a page
+   directory (PDT) that can contain references to 1024 page tables (PT),
+   each of which can point to 1024 sections of physical memory called page
+   frames (PF). Each page frame is 4096 byte large. In a virtual (linear)
+   address, the highest 10 bits specifies the offset of a page directory
+   entry (PDE) in the current PDT, the next 10 bits the offset of a page
+   table entry (PTE) within the page table pointed to by that PDE. The
+   lowest 12 bits in the address is the offset within the page frame to be
+   addressed.
+
+   All page directories, page tables and page frames need to be aligned on
+   4096 byte addresses. This makes it possible to address a PDT, PT or PF
+   with just the highest 20 bits of a 32 bit address, since the lowest 12
+   need to be zero.
+
+   The PDE and PTE structure is very similar to each other: 32 bits (4
+   bytes), where the highest 20 bits points to a PTE or PF, and the lowest
+   12 bits control access rights and other configurations. 4 bytes times
+   1024 equals 4096 bytes, so a page directory and page table both fit in
+   a page frame themselves.
+
+   The translation of linear addresses to physical addresses is described
+   in the figure below.
+
+   While pages are normally 4096 bytes, it is also possible to use 4 MB
+   pages. A PDE then points directly to a 4 MB page frame, which needs to
+   be aligned on a 4 MB address boundary. The address translation is
+   almost the same as in the figure, with just the page table step
+   removed. It is possible to mix 4 MB and 4 KB pages.
+   Translating virtual addresses (linear addresses) to physical addresses.
+
+   Translating virtual addresses (linear addresses) to physical addresses.
+
+   The 20 bits pointing to the current PDT is stored in the register cr3.
+   The lower 12 bits of cr3 are used for configuration.
+
+   For more details on the paging structures, see chapter 4 in the Intel
+   manual [33]. The most interesting bits are U/S, which determine what
+   privilege levels can access this page (PL0 or PL3), and R/W, which
+   makes the memory in the page read-write or read-only.
+
+9.2.1 Identity Paging
+
+   The simplest kind of paging is when we map each virtual address onto
+   the same physical address, called identity paging. This can be done at
+   compile time by creating a page directory where each entry points to
+   its corresponding 4 MB frame. In NASM this can be done with macros and
+   commands (%rep, times and dd). It can of course also be done at
+   run-time by using ordinary assembly code instructions.
+
+9.2.2 Enabling Paging
+
+   Paging is enabled by first writing the address of a page directory to
+   cr3 and then setting bit 31 (the PG "paging-enable" bit) of cr0 to 1.
+   To use 4 MB pages, set the PSE bit (Page Size Extensions, bit 4) of
+   cr4. The following assembly code shows an example:
+    ; eax has the address of the page directory
+    mov cr3, eax
+
+    mov ebx, cr4        ; read current cr4
+    or  ebx, 0x00000010 ; set PSE
+    mov cr4, ebx        ; update cr4
+
+    mov ebx, cr0        ; read current cr0
+    or  ebx, 0x80000000 ; set PG
+    mov cr0, ebx        ; update cr0
+
+    ; now paging is enabled
+
+9.2.3 A Few Details
+
+   It is important to note that all addresses within the page directory,
+   page tables and in cr3 need to be physical addresses to the structures,
+   never virtual. This will be more relevant in later sections where we
+   dynamically update the paging structures (see the chapter [172]"User
+   Mode").
+
+   An instruction that is useful when an updating a PDT or PT is invlpg.
+   It invalidates the Translation Lookaside Buffer (TLB) entry for a
+   virtual address. The TLB is a cache for translated addresses, mapping
+   physical addresses corresponding to virtual addresses. This is only
+   required when changing a PDE or PTE that was previously mapped to
+   something else. If the PDE or PTE had previously been marked as not
+   present (bit 0 was set to 0), executing invlpg is unnecessary. Changing
+   the value of cr3 will cause all entries in the TLB to be invalidated.
+
+   An example of invalidating a TLB entry is shown below:
+    ; invalidate any TLB references to virtual address 0
+    invlpg [0]
+
+9.3 Paging and the Kernel
+
+   This section will describe how paging affects the OS kernel. We
+   encourage you to run your OS using identity paging before trying to
+   implement a more advanced paging setup, since it can be hard to debug a
+   malfunctioning page table that is set up via assembly code.
+
+9.3.1 Reasons to Not Identity Map the Kernel
+
+   If the kernel is placed at the beginning of the virtual address space -
+   that is, the virtual address space (0x00000000, "size of kernel") maps
+   to the location of the kernel in memory - there will be issues when
+   linking the user mode process code. Normally, during linking, the
+   linker assumes that the code will be loaded into the memory position
+   0x00000000. Therefore, when resolving absolute references, 0x00000000
+   will be the base address for calculating the exact position. But if the
+   kernel is mapped onto the virtual address space (0x00000000, "size of
+   kernel"), the user mode process cannot be loaded at virtual address
+   0x00000000 - it must be placed somewhere else. Therefore, the
+   assumption from the linker that the user mode process is loaded into
+   memory at position 0x00000000 is wrong. This can be corrected by using
+   a linker script which tells the linker to assume a different starting
+   address, but that is a very cumbersome solution for the users of the
+   operating system.
+
+   This also assumes that we want the kernel to be part of the user mode
+   process' address space. As we will see later, this is a nice feature,
+   since during system calls we don't have to change any paging structures
+   to get access to the kernel's code and data. The kernel pages will of
+   course require privilege level 0 for access, to prevent a user process
+   from reading or writing kernel memory.
+
+9.3.2 The Virtual Address for the Kernel
+
+   Preferably, the kernel should be placed at a very high virtual memory
+   address, for example 0xC0000000 (3 GB). The user mode process is not
+   likely to be 3 GB large, which is now the only way that it can conflict
+   with the kernel. When the kernel uses virtual addresses at 3 GB and
+   above it is called a higher-half kernel. 0xC0000000 is just an example,
+   the kernel can be placed at any address higher than 0 to get the same
+   benefits. Choosing the correct address depends on how much virtual
+   memory should be available for the kernel (it is easiest if all memory
+   above the kernel virtual address should belong to the kernel) and how
+   much virtual memory should be available for the process.
+
+   If the user mode process is larger than 3 GB, some pages will need to
+   be swapped out by the kernel. Swapping pages is not part of this book.
+
+9.3.3 Placing the Kernel at 0xC0000000
+
+   To start with, it is better to place the kernel at 0xC0100000 than
+   0xC0000000, since this makes it possible to map (0x00000000,
+   0x00100000) to (0xC0000000, 0xC0100000). This way, the entire range
+   (0x00000000, "size of kernel") of memory is mapped to the range
+   (0xC0000000, 0xC0000000 + "size of kernel").
+
+   Placing the kernel at 0xC0100000 isn't hard, but it does require some
+   thought. This is once again a linking problem. When the linker resolves
+   all absolute references in the kernel, it will assume that our kernel
+   is loaded at physical memory location 0x00100000, not 0x00000000, since
+   relocation is used in the linker script (see the section [173]"Linking
+   the kernel"). However, we want the jumps to be resolved using
+   0xC0100000 as base address, since otherwise a kernel jump will jump
+   straight into the user mode process code (remember that the user mode
+   process is loaded at virtual memory 0x00000000).
+
+   However, we can't simply tell the linker to assume that the kernel
+   starts (is loaded) at 0xC01000000, since we want it to be loaded at the
+   physical address 0x00100000. The reason for having the kernel loaded at
+   1 MB is because it can't be loaded at 0x00000000, since there is BIOS
+   and GRUB code loaded below 1 MB. Furthermore, we cannot assume that we
+   can load the kernel at 0xC0100000, since the machine might not have 3
+   GB of physical memory.
+
+   This can be solved by using both relocation (.=0xC0100000) and the AT
+   instruction in the linker script. Relocation specifies that
+   non-relative memory-references should should use the relocation address
+   as base in address calculations. AT specifies where the kernel should
+   be loaded into memory. Relocation is done at link time by GNU ld [37],
+   the load address specified by AT is handled by GRUB when loading the
+   kernel, and is part of the ELF format [18].
+
+9.3.4 Higher-half Linker Script
+
+   We can modify the [174]first linker script to implement this:
+    ENTRY(loader)           /* the name of the entry symbol */
+
+    . = 0xC0100000          /* the code should be relocated to 3GB + 1MB */
+
+    /* align at 4 KB and load at 1 MB */
+    .text ALIGN (0x1000) : AT(ADDR(.text)-0xC0000000)
+    {
+        *(.text)            /* all text sections from all files */
+    }
+
+    /* align at 4 KB and load at 1 MB + . */
+    .rodata ALIGN (0x1000) : AT(ADDR(.text)-0xC0000000)
+    {
+        *(.rodata*)         /* all read-only data sections from all files */
+    }
+
+    /* align at 4 KB and load at 1 MB + . */
+    .data ALIGN (0x1000) : AT(ADDR(.text)-0xC0000000)
+    {
+        *(.data)            /* all data sections from all files */
+    }
+
+    /* align at 4 KB and load at 1 MB + . */
+    .bss ALIGN (0x1000) : AT(ADDR(.text)-0xC0000000)
+    {
+        *(COMMON)           /* all COMMON sections from all files */
+        *(.bss)             /* all bss sections from all files */
+    }
+
+9.3.5 Entering the Higher Half
+
+   When GRUB jumps to the kernel code, there is no paging table.
+   Therefore, all references to 0xC0100000 + X won't be mapped to the
+   correct physical address, and will therefore cause a general protection
+   exception (GPE) at the very best, otherwise (if the computer has more
+   than 3 GB of memory) the computer will just crash.
+
+   Therefore, assembly code that doesn't use relative jumps or relative
+   memory addressing must be used to do the following:
+     * Set up a page table.
+     * Add identity mapping for the first 4 MB of the virtual address
+       space.
+     * Add an entry for 0xC0100000 that maps to 0x0010000
+
+   If we skip the identity mapping for the first 4 MB, the CPU would
+   generate a page fault immediately after paging was enabled when trying
+   to fetch the next instruction from memory. After the table has been
+   created, an jump can be done to a label to make eip point to a virtual
+   address in the higher half:
+    ; assembly code executing at around 0x00100000
+    ; enable paging for both actual location of kernel
+    ; and its higher-half virtual location
+
+    lea ebx, [higher_half] ; load the address of the label in ebx
+    jmp ebx                ; jump to the label
+
+    higher_half:
+        ; code here executes in the higher half kernel
+        ; eip is larger than 0xC0000000
+        ; can continue kernel initialisation, calling C code, etc.
+
+   The register eip will now point to a memory location somewhere right
+   after 0xC0100000 - all the code can now execute as if it were located
+   at 0xC0100000, the higher-half. The entry mapping of the first 4 MB of
+   virtual memory to the first 4 MB of physical memory can now be removed
+   from the page table and its corresponding entry in the TLB invalidated
+   with invlpg [0].
+
+9.3.6 Running in the Higher Half
+
+   There are a few more details we must deal with when using a higher-half
+   kernel. We must be careful when using memory-mapped I/O that uses
+   specific memory locations. For example, the frame buffer is located at
+   0x000B8000, but since there is no entry in the page table for the
+   address 0x000B8000 any longer, the address 0xC00B8000 must be used,
+   since the virtual address 0xC0000000 maps to the physical address
+   0x00000000.
+
+   Any explicit references to addresses within the multiboot structure
+   needs to be changed to reflect the new virtual addresses as well.
+
+   Mapping 4 MB pages for the kernel is simple, but wastes memory (unless
+   you have a really big kernel). Creating a higher-half kernel mapped in
+   as 4 KB pages saves memory but is harder to set up. Memory for the page
+   directory and one page table can be reserved in the .data section, but
+   one needs to configure the mappings from virtual to physical addresses
+   at run-time. The size of the kernel can be determined by exporting
+   labels from the linker script [37], which we'll need to do later anyway
+   when writing the page frame allocator (see the chapter [175]"Page Frame
+   Allocation).
+
+9.4 Virtual Memory Through Paging
+
+   Paging enables two things that are good for virtual memory. First, it
+   allows for fine-grained access control to memory. You can mark pages as
+   read-only, read-write, only for PL0 etc. Second, it creates the
+   illusion of contiguous memory. User mode processes, and the kernel, can
+   access memory as if it were contiguous, and the contiguous memory can
+   be extended without the need to move data around in memory. We can also
+   allow the user mode programs access to all memory below 3 GB, but
+   unless they actually use it, we don't have to assign page frames to the
+   pages. This allows processes to have code located near 0x00000000 and
+   the stack at just below 0xC0000000, and still not require more than two
+   actual pages.
+
+9.5 Further Reading
+
+     * Chapter 4 (and to some extent chapter 3) of the Intel manual [33]
+       are your definitive sources for the details about paging.
+     * Wikipedia has an article on paging:
+       [176]http://en.wikipedia.org/wiki/Paging
+     * The OSDev wiki has a page on paging:
+       [177]http://wiki.osdev.org/Paging and a tutorial for making a
+       higher-half kernel:
+       [178]http://wiki.osdev.org/Higher_Half_bare_bones
+     * Gustavo Duarte's article on how a kernel manages memory is well
+       worth a read:
+       [179]http://duartes.org/gustavo/blog/post/anatomy-of-a-program-in-m
+       emory
+     * Details on the linker command language can be found at Steve
+       Chamberlain's website [37].
+     * More details on the ELF format can be found in this presentation:
+       [180]http://flint.cs.yale.edu/cs422/doc/ELF_Format.pdf
+
+10 Page Frame Allocation
+
+   When using virtual memory, how does the OS know which parts of memory
+   are free to use? That is the role of the page frame allocator.
+
+10.1 Managing Available Memory
+
+10.1.1 How Much Memory is There?
+
+   First we need to know how much memory is available on the computer the
+   OS is running on. The easiest way to do this is to read it from the
+   multiboot structure [19] passed to us by GRUB. GRUB collects the
+   information we need about the memory - what is reserved, I/O mapped,
+   read-only etc. We must also make sure that we don't mark the part of
+   memory used by the kernel as free (since GRUB doesn't mark this memory
+   as reserved). One way to know how much memory the kernel uses is to
+   export labels at the beginning and the end of the kernel binary from
+   the linker script:
+    ENTRY(loader)           /* the name of the entry symbol */
+
+    . = 0xC0100000          /* the code should be relocated to 3 GB + 1 MB */
+
+    /* these labels get exported to the code files */
+    kernel_virtual_start = .;
+    kernel_physical_start = . - 0xC0000000;
+
+    /* align at 4 KB and load at 1 MB */
+    .text ALIGN (0x1000) : AT(ADDR(.text)-0xC0000000)
+    {
+        *(.text)            /* all text sections from all files */
+    }
+
+    /* align at 4 KB and load at 1 MB + . */
+    .rodata ALIGN (0x1000) : AT(ADDR(.rodata)-0xC0000000)
+    {
+        *(.rodata*)         /* all read-only data sections from all files */
+    }
+
+    /* align at 4 KB and load at 1 MB + . */
+    .data ALIGN (0x1000) : AT(ADDR(.data)-0xC0000000)
+    {
+        *(.data)            /* all data sections from all files */
+    }
+
+    /* align at 4 KB and load at 1 MB + . */
+    .bss ALIGN (0x1000) : AT(ADDR(.bss)-0xC0000000)
+    {
+        *(COMMON)           /* all COMMON sections from all files */
+        *(.bss)             /* all bss sections from all files */
+    }
+
+    kernel_virtual_end = .;
+    kernel_physical_end = . - 0xC0000000;
+
+   These labels can directly be read from assembly code and pushed on the
+   stack to make them available to C code:
+    extern kernel_virtual_start
+    extern kernel_virtual_end
+    extern kernel_physical_start
+    extern kernel_physical_end
+
+    ; ...
+
+    push kernel_physical_end
+    push kernel_physical_start
+    push kernel_virtual_end
+    push kernel_virtual_start
+
+    call kmain
+
+   This way we get the labels as arguments to kmain. If you want to use C
+   instead of assembly code, one way to do it is to declare the labels as
+   functions and take the addresses of these functions:
+    void kernel_virtual_start(void);
+
+    /* ... */
+
+    unsigned int vaddr = (unsigned int) &kernel_virtual_start;
+
+   If you use GRUB modules you need to make sure the memory they use is
+   marked as reserved as well.
+
+   Note that the available memory does not need to be contiguous. In the
+   first 1 MB there are several I/O-mapped memory sections, as well as
+   memory used by GRUB and the BIOS. Other parts of the memory might be
+   similarly unavailable.
+
+   It's convenient to divide the memory sections into complete page
+   frames, as we can't map part of pages into memory.
+
+10.1.2 Managing Available Memory
+
+   How do we know which page frames are in use? The page frame allocator
+   needs to keep track of which are free and which aren't. There are
+   several ways to do this: bitmaps, linked lists, trees, the Buddy System
+   (used by Linux) etc. For more information about the different
+   algorithms see the article on OSDev [38].
+
+   Bitmaps are quite easy to implement. One bit is used for each page
+   frame and one (or more) page frames are dedicated to store the bitmap.
+   (Note that this is just one way to do it, other designs might be better
+   and/or more fun to implement.)
+
+10.2 How Can We Access a Page Frame?
+
+   The page frame allocator returns the physical start address of the page
+   frame. This page frame is not mapped in - no page table points to this
+   page frame. How can we read and write data to the frame?
+
+   We need to map the page frame into virtual memory, by updating the PDT
+   and/or PT used by the kernel. What if all available page tables are
+   full? Then we can't map the page frame into memory, because we'd need a
+   new page table - which takes up an entire page frame - and to write to
+   this page frame we'd need to map its page frame... Somehow this
+   circular dependency must be broken.
+
+   One solution is to reserve a part of the first page table used by the
+   kernel (or some other higher-half page table) for temporarily mapping
+   page frames to make them accessible. If the kernel is mapped at
+   0xC0000000 (page directory entry with index 768), and 4 KB page frames
+   are used, then the kernel has at least one page table. If we assume -
+   or limit us to - a kernel of size at most 4 MB minus 4 KB we can
+   dedicate the last entry (entry 1023) of this page table for temporary
+   mappings. The virtual address of pages mapped in using the last entry
+   of the kernel's PT will be:
+    (768 << 22) | (1023 << 12) | 0x000 = 0xC03FF000
+
+   After we've temporarily mapped the page frame we want to use as a page
+   table, and set it up to map in our first page frame, we can add it to
+   the paging directory, and remove the temporary mapping.
+
+10.3 A Kernel Heap
+
+   So far we've only been able to work with fixed-size data, or directly
+   with raw memory. Now that we have a page frame allocator we can
+   implement malloc and free to use in the kernel.
+
+   Kernighan and Ritchie [8] have an example implementation in their book
+   [8] that we can draw inspiration from. The only modification we need to
+   do is to replace calls to sbrk/brk with calls to the page frame
+   allocator when more memory is needed. We must also make sure to map the
+   page frames returned by the page frame allocator to virtual addresses.
+   A correct implementation should also return page frames to the page
+   frame allocator on call to free, whenever sufficiently large blocks of
+   memory are freed.
+
+10.4 Further reading
+
+     * The OSDev wiki page on page frame allocation:
+       [181]http://wiki.osdev.org/Page_Frame_Allocation
+
+11 User Mode
+
+   User mode is now almost within our reach, there are just a few more
+   steps required to get there. Although these steps might seem easy they
+   way they are presented in this chapter, they can be tricky to
+   implement, since there are a lot of places where small errors will
+   cause bugs that are hard to find.
+
+11.1 Segments for User Mode
+
+   To enable user mode we need to add two more segments to the GDT. They
+   are very similar to the kernel segments we added when we [182]set up
+   the GDT in the [183]chapter about segmentation:
+
+   CAPTION: The segment descriptors needed for user mode.
+
+   Index Offset Name              Address range           Type DPL
+       3   0x18 user code segment 0x00000000 - 0xFFFFFFFF RX   PL3
+       4   0x20 user data segment 0x00000000 - 0xFFFFFFFF RW   PL3
+
+   The difference is the DPL, which now allows code to execute in PL3. The
+   segments can still be used to address the entire address space, just
+   using these segments for user mode code will not protect the kernel.
+   For that we need paging.
+
+11.2 Setting Up For User Mode
+
+   There are a few things every user mode process needs:
+     * Page frames for code, data and stack. At the moment it suffices to
+       allocate one page frame for the stack and enough page frames to fit
+       the program's code. Don't worry about setting up a stack that can
+       be grow and shrink at this point in time, focus on getting a basic
+       implementation work first.
+     * The binary from the GRUB module has to be copied to the page frames
+       used for the programs code.
+     * A page directory and page tables are needed to map the page frames
+       described above into memory. At least two page tables are needed,
+       because the code and data should be mapped in at 0x00000000 and
+       increasing, and the stack should start just below the kernel, at
+       0xBFFFFFFB, growing towards lower addresses. The U/S flag has to be
+       set to allow PL3 access.
+
+   It might be convenient to store this information in a struct
+   representing a process. This process struct can be dynamically
+   allocated with the kernel's malloc function.
+
+11.3 Entering User Mode
+
+   The only way to execute code with a lower privilege level than the
+   current privilege level (CPL) is to execute an iret or lret instruction
+   - interrupt return or long return, respectively.
+
+   To enter user mode we set up the stack as if the processor had raised
+   an inter-privilege level interrupt. The stack should look like the
+   following:
+    [esp + 16]  ss      ; the stack segment selector we want for user mode
+    [esp + 12]  esp     ; the user mode stack pointer
+    [esp +  8]  eflags  ; the control flags we want to use in user mode
+    [esp +  4]  cs      ; the code segment selector
+    [esp +  0]  eip     ; the instruction pointer of user mode code to execute
+
+   See the Intel manual [33], section 6.2.1, figure 6-4 for more
+   information.
+
+   The instruction iret will then read these values from the stack and
+   fill in the corresponding registers. Before we execute iret we need to
+   change to the page directory we setup for the user mode process. It is
+   important to remember that to continue executing kernel code after
+   we've switched PDT, the kernel needs to be mapped in. One way to
+   accomplish this is to have a separate PDT for the kernel, which maps
+   all data at 0xC0000000 and above, and merge it with the user PDT (which
+   only maps below 0xC0000000) when performing the switch. Remember that
+   physical address of the PDT has to be used when setting the register
+   cr3.
+
+   The register eflags contains a set of different flags, specified in
+   section 2.3 of the Intel manual [33]. Most important for us is the
+   interrupt enable (IF) flag. The assembly code instruction sti can't be
+   used in privilege level 3 for enabling interrupts. If interrupts are
+   disabled when entering user mode, then interrupts can't enabled once
+   user mode is entered. Setting the IF flag in the eflags entry on the
+   stack will enable interrupts in user mode, since the assembly code
+   instruction iret will set the register eflags to the corresponding
+   value on the stack.
+
+   For now, we should have interrupts disabled, as it requires a little
+   more work to get inter-privilege level interrupts to work properly (see
+   the section [184]"System calls").
+
+   The value eip on the stack should point to the entry point for the user
+   code - 0x00000000 in our case. The value esp on the stack should be
+   where the stack starts - 0xBFFFFFFB (0xC0000000 - 4).
+
+   The values cs and ss on the stack should be the segment selectors for
+   the user code and user data segments, respectively. As we saw in the
+   [185]segmentation chapter, the lowest two bits of a segment selector is
+   the RPL - the Requested Privilege Level. When using iret to enter PL3,
+   the RPL of cs and ss should be 0x3. The following code shows an
+   example:
+    USER_MODE_CODE_SEGMENT_SELECTOR equ 0x18
+    USER_MODE_DATA_SEGMENT_SELECTOR equ 0x20
+    mov cs, USER_MODE_CODE_SEGMENT_SELECTOR | 0x3
+    mov ss, USER_MODE_DATA_SEGMENT_SELECTOR | 0x3
+
+   The register ds, and the other data segment registers, should be set to
+   the same segment selector as ss. They can be set the ordinary way, with
+   the mov assembly code instruction.
+
+   We are now ready to execute iret. If everything has been set up right,
+   we should now have a kernel that can enter user mode.
+
+11.4 Using C for User Mode Programs
+
+   When C is used as the programming language for user mode programs, it
+   is important to think about the structure of the file that will be the
+   result of the compilation.
+
+   The reason we can use ELF [18] as the file format for for the kernel
+   executable is because GRUB knows how to parse and interpret the ELF
+   file format. If we implemented an ELF parser, we could compile the user
+   mode programs into ELF binaries as well. We leave this as an exercise
+   for the reader.
+
+   One thing we can do to make it easier to develop user mode programs is
+   to allow the programs to be written in C, but compile them to flat
+   binaries instead of ELF binaries. In C the layout of the generated code
+   is more unpredictable and the entry point, main, might not be at offset
+   0 in the binary. One common way to work around this is to add a few
+   assembly code lines placed at offset 0 which calls main:
+    extern main
+
+    section .text
+        ; push argv
+        ; push argc
+        call main
+        ; main has returned, eax is return value
+        jmp  $    ; loop forever
+
+   If this code is saved in a file called start.s, then the following code
+   show an example of a linker script that places these instructions first
+   in executable (remember that start.s gets compiled to start.o):
+    OUTPUT_FORMAT("binary")    /* output flat binary */
+
+    SECTIONS
+    {
+        . = 0;                 /* relocate to address 0 */
+
+        .text ALIGN(4):
+        {
+            start.o(.text)     /* include the .text section of start.o */
+            *(.text)           /* include all other .text sections */
+        }
+
+        .data ALIGN(4):
+        {
+            *(.data)
+        }
+
+        .rodata ALIGN(4):
+        {
+            *(.rodata*)
+        }
+    }
+
+   Note: *(.text) will not include the .text section of start.o again.
+
+   With this script we can write programs in C or assembler (or any other
+   language that compiles to object files linkable with ld), and it is
+   easy to load and map for the kernel (.rodata will be mapped in as
+   writeable, though).
+
+   When we compile user programs we want the following GCC flags:
+    -m32 -nostdlib -nostdinc -fno-builtin -fno-stack-protector -nostartfiles
+    -nodefaultlibs
+
+   For linking, the followings flags should be used:
+    -T link.ld -melf_i386  # emulate 32 bits ELF, the binary output is specified
+                           # in the linker script
+
+   The option -T instructs the linker to use the linker script link.ld.
+
+11.4.1 A C Library
+
+   It might now be interesting to start thinking about writing a small
+   "standard library" for your programs. Some of the functionality
+   requires [186]system calls to work, but some, such as the functions in
+   string.h, does not.
+
+11.5 Further Reading
+
+     * Gustavo Duarte has an article on privilege levels:
+       [187]http://duartes.org/gustavo/blog/post/cpu-rings-privilege-and-p
+       rotection
+
+12 File Systems
+
+   We are not required to have file systems in our operating system, but
+   it is a very usable abstraction, and it often plays a central part of
+   many operating systems, especially UNIX-like operating systems. Before
+   we start the process of supporting multiple processes and system calls
+   we might want to consider implementing a simple file system.
+
+12.1 Why a File System?
+
+   How do we specify what programs to run in our OS? Which is the first
+   program to run? How do programs output data or read input?
+
+   In UNIX-like systems, with their almost-everything-is-a-file
+   convention, these problems are solved by the file system. (It might
+   also be interesting to read a bit about the Plan 9 project, which takes
+   this idea one step further.)
+
+12.2 A Simple Read-Only File System
+
+   The simplest file system might be what we already have - one file,
+   existing only in RAM, loaded by GRUB before the kernel starts. When the
+   kernel and operating system grows this is probably too limiting.
+
+   A file system that is slightly more advanced than just the bits of one
+   file is a file with metadata. The metadata can describe the type of the
+   file, the size of the file and so on. A utility program can be created
+   that runs at build time, adding this metadata to a file. This way, a
+   "file system in a file" can be constructed by concatenating several
+   files with metadata into one large file. The result of this technique
+   is a read-only file system that resides in memory (once GRUB has loaded
+   the file).
+
+   The program constructing the file system can traverse a directory on
+   the host system and add all subdirectories and files as part of the
+   target file system. Each object in the file system (directory or file)
+   can consist of a header and a body, where the body of a file is the
+   actual file and the body of a directory is a list of entries - names
+   and "addresses" of other files and directories.
+
+   Each object in this file system will become contiguous, so they will be
+   easy to read from memory for the kernel. All objects will also have a
+   fixed size (except for the last one, which can grow), therefore it is
+   difficult to add new files or modify existing ones.
+
+12.3 Inodes and Writable File Systems
+
+   When the need for a writable file system arises, then it is a good idea
+   to look into the concept of an inode. See the section [188]"Further
+   Reading" for recommended reading.
+
+12.4 A Virtual File System
+
+   What abstraction should be used for reading and writing to devices such
+   as the screen and the keyboard?
+
+   A virtual file system (VFS) creates an abstraction on top of the
+   concrete file systems. A VFS mainly supplies the path system and file
+   hierarchy, it delegates operations on files to the underlying file
+   systems. The original paper on VFS is succinct and well worth a read.
+   See the section [189]"Further Reading" for a reference.
+
+   With a VFS we could mount a special file system on the path /dev. This
+   file system would handle all devices such as keyboards and the console.
+   However, one could also take the traditional UNIX approach, with
+   major/minor device numbers and mknod to create special files for
+   devices. Which approach you think is the most appropriate is up to you,
+   there is no right or wrong when building abstraction layers (although
+   some abstractions turn out way more useful than others).
+
+12.5 Further Reading
+
+     * The ideas behind the Plan 9 operating systems is worth taking a
+       look at: [190]http://plan9.bell-labs.com/plan9/index.html
+     * Wikipedia's page on inodes: [191]http://en.wikipedia.org/wiki/Inode
+       and the inode pointer structure:
+       [192]http://en.wikipedia.org/wiki/Inode_pointer_structure.
+     * The original paper on the concept of vnodes and a virtual file
+       system is quite interesting:
+       [193]http://www.arl.wustl.edu/~fredk/Courses/cs523/fall01/Papers/kl
+       eiman86vnodes.pdf
+     * Poul-Henning Kamp discusses the idea of a special file system for
+       /dev in
+       [194]http://static.usenix.org/publications/library/proceedings/bsdc
+       on02/full_papers/kamp/kamp_html/index.html
+
+13 System Calls
+
+   System calls is the way user-mode applications interact with the kernel
+   - to ask for resources, request operations to be performed, etc. The
+   system call API is the part of the kernel that is most exposed to the
+   users, therefore its design requires some thought.
+
+13.1 Designing System Calls
+
+   It is up to us, the kernel developers, to design the system calls that
+   application developers can use. We can draw inspiration from the POSIX
+   standards or, if they seem like too much work, just look at the ones
+   for Linux, and pick and choose. See the section [195]"Further Reading"
+   at the end of the chapter for references.
+
+13.2 Implementing System Calls
+
+   System calls are traditionally invoked with software interrupts. The
+   user applications put the appropriate values in registers or on the
+   stack and then initiates a pre-defined interrupt which transfers
+   execution to the kernel. The interrupt number used is dependent on the
+   kernel, Linux uses the number 0x80 to identify that an interrupt is
+   intended as a system call.
+
+   When system calls are executed, the current privilege level is
+   typically changed from PL3 to PL0 (if the application is running in
+   user mode). To allow this, the DPL of the entry in the IDT for the
+   system call interrupt needs to allow PL3 access.
+
+   Whenever inter-privilege level interrupts occur, the processor pushes a
+   few important registers onto the stack - the same ones we used to enter
+   user mode [196]before, see figure 6-4, section 6.12.1, in the Intel
+   manual [33]. What stack is used? The same section in [33] specifies
+   that if an interrupt leads to code executing at a numerically lower
+   privilege level, a stack switch occurs. The new values for the
+   registers ss and esp is loaded from the current Task State Segment
+   (TSS). The TSS structure is specified in figure 7-2, section 7.2.1 of
+   the Intel manual [33].
+
+   To enable system calls we need to setup a TSS before entering user
+   mode. Setting it up can be done in C by setting the ss0 and esp0 fields
+   of a "packed struct" that represents a TSS. Before loading the "packed
+   struct" into the processor, a TSS descriptor has to be added to the
+   GDT. The structure of the TSS descriptor is described in section 7.2.2
+   in [33].
+
+   You specify the current TSS segment selector by loading it into the tr
+   register with the ltr assembly code instruction. If the TSS segment
+   descriptor has index 5, and thus offset 5 * 8 = 40 = 0x28, this is the
+   value that should be loaded into the register tr.
+
+   When we entered user mode before in the chapter [197]"Entering User
+   Mode" we disabled interrupts when executing in PL3. Since system calls
+   are implemented using interrupts, interrupts must be enabled in user
+   mode. Setting the IF flag bit in the eflags value on the stack will
+   make iret enable interrupts (since the eflags value on the stack will
+   be loaded into the eflags register by the assembly code instruction
+   iret).
+
+13.3 Further Reading
+
+     * The Wikipedia page on POSIX, with links to the specifications:
+       [198]http://en.wikipedia.org/wiki/POSIX
+     * A list of system calls used in Linux:
+       [199]http://bluemaster.iu.hio.no/edu/dark/lin-asm/syscalls.html
+     * The Wikipedia page on system calls:
+       [200]http://en.wikipedia.org/wiki/System_call
+     * The Intel manual [33] sections on interrupts (chapter 6) and TSS
+       (chapter 7) are where you get all the details you need.
+
+14 Multitasking
+
+   How do you make multiple processes appear to run at the same time?
+   Today, this question has two answers:
+     * With the availability of multi-core processors, or on system with
+       multiple processors, two processes can actually run at the same
+       time by running two processes on different cores or processors.
+     * Fake it. That is, switch rapidly (faster than a human can notice)
+       between the processes. At any given moment there is only one
+       process executing, but the rapid switching gives the impression
+       that they are running "at the same time".
+
+   Since the operating system created in this book does not support
+   multi-core processors or multiple processors the only option is to fake
+   it. The part of the operating system responsible for rapidly switching
+   between the processes is called the scheduling algorithm.
+
+14.1 Creating New Processes
+
+   Creating new processes is usually done with two different system calls:
+   fork and exec. fork creates an exact copy of the currently running
+   process, while exec replaces the current process with one that is
+   specified by a path to the location of a program in the file system. Of
+   these two we recommend that you start implementing exec, since this
+   system call will do almost exactly the same steps as described in the
+   section [201]"Setting up for user mode" in the chapter [202]"User
+   Mode".
+
+14.2 Cooperative Scheduling with Yielding
+
+   The easiest way to achieve rapid switching between processes is if the
+   processes themselves are responsible for the switching. The processes
+   run for a while and then tell the OS (via a system call) that it can
+   now switch to another process. Giving up the control of CPU to another
+   process is called yielding and when the processes themselves are
+   responsible for the scheduling it's called cooperative scheduling,
+   since all the processes must cooperate with each other.
+
+   When a process yields the process' entire state must be saved (all the
+   registers), preferably on the kernel heap in a structure that
+   represents a process. When changing to a new process all the registers
+   must be restored from the saved values.
+
+   Scheduling can be implemented by keeping a list of which processes are
+   running. The system call yield should then run the next process in the
+   list and put the current one last (other schemes are possible, but this
+   is a simple one).
+
+   The transfer of control to the new process is done via the iret
+   assembly code instruction in exactly the same way as explained in the
+   section [203]"Entering user mode" in the chapter [204]"User Mode".
+
+   We strongly recommend that you start to implement support for multiple
+   processes by implementing cooperative scheduling. We further recommend
+   that you have a working solution for both exec, fork and yield before
+   implementing preemptive scheduling. Since cooperative scheduling is
+   deterministic, it is much easier to debug than preemptive scheduling.
+
+14.3 Preemptive Scheduling with Interrupts
+
+   Instead of letting the processes themselves manage when to change to
+   another process the OS can switch processes automatically after a short
+   period of time. The OS can set up the programmable interval timer (PIT)
+   to raise an interrupt after a short period of time, for example 20 ms.
+   In the interrupt handler for the PIT interrupt the OS will change the
+   running process to a new one. This way the processes themselves don't
+   need to worry about scheduling. This kind of scheduling is called
+   preemptive scheduling.
+
+14.3.1 Programmable Interval Timer
+
+   To be able to do preemptive scheduling the PIT must first be configured
+   to raise interrupts every x milliseconds, where x should be
+   configurable.
+
+   The configuration of the PIT is very similar to the configuration of
+   other hardware devices: a byte is sent to an I/O port. The command port
+   of the PIT is 0x43. To read about all the configuration options, see
+   the article about the PIT on OSDev [39]. We use the following options:
+     * Raise interrupts (use channel 0)
+     * Send the divider as low byte then high byte (see next section for
+       an explanation)
+     * Use a square wave
+     * Use binary mode
+
+   This results in the configuration byte 00110110.
+
+   Setting the interval for how often interrupts are to be raised is done
+   via a divider, the same way as for the serial port. Instead of sending
+   the PIT a value (e.g. in milliseconds) that says how often an interrupt
+   should be raised you send the divider. The PIT operates at 1193182 Hz
+   as default. Sending the divider 10 results in the PIT running at
+   1193182 / 10 = 119318 Hz. The divider can only be 16 bits, so it is
+   only possible to configure the timer's frequency between 1193182 Hz and
+   1193182 / 65535 = 18.2 Hz. We recommend that you create a function that
+   takes an interval in milliseconds and converts it to the correct
+   divider.
+
+   The divider is sent to the channel 0 data I/O port of the PIT, but
+   since only one byte can be sent at at a time, the lowest 8 bits of the
+   divider has to sent first, then the highest 8 bits of the divider can
+   be sent. The channel 0 data I/O port is located at 0x40. Again, see the
+   article on OSDev [39] for more details.
+
+14.3.2 Separate Kernel Stacks for Processes
+
+   If all processes uses the same kernel stack (the stack exposed by the
+   TSS) there will be trouble if a process is interrupted while still in
+   kernel mode. The process that is being switched to will now use the
+   same kernel stack and will overwrite what the previous process have
+   written on the stack (remember that TSS data structure points to the
+   beginning of the stack).
+
+   To solve this problem every process should have it's own kernel stack,
+   the same way that each process have their own user mode stack. When
+   switching process the TSS must be updated to point to the new process'
+   kernel stack.
+
+14.3.3 Difficulties with Preemptive Scheduling
+
+   When using preemptive scheduling one problem arises that doesn't exist
+   with cooperative scheduling. With cooperative scheduling every time a
+   process yields, it must be in user mode (privilege level 3), since
+   yield is a system call. With preemptive scheduling, the processes can
+   be interrupted in either user mode or kernel mode (privilege level 0),
+   since the process itself does not control when it gets interrupted.
+
+   Interrupting a process in kernel mode is a little bit different than
+   interrupting a process in user mode, due to the way the CPU sets up the
+   stack at interrupts. If a privilege level change occurred (the process
+   was interrupted in user mode) the CPU will push the value of the
+   process ss and esp register on the stack. If no privilege level change
+   occurs (the process was interrupted in kernel mode) the CPU won't push
+   the esp register on the stack. Furthermore, if there was no privilege
+   level change, the CPU won't change stack to the one defined it the TSS.
+
+   This problem is solved by calculating what the value of esp was before
+   the interrupt. Since you know that the CPU pushes 3 things on the stack
+   when no privilege change happens and you know how much you have pushed
+   on the stack, you can calculate what the value of esp was at the time
+   of the interrupt. This is possible since the CPU won't change stacks if
+   there is no privilege level change, so the content of esp will be the
+   same as at the time of the interrupt.
+
+   To further complicate things, one must think of how to handle case when
+   switching to a new process that should be running in kernel mode. Since
+   iret is being used without a privilege level change the CPU won't
+   update the value of esp with the one placed on the stack - you must
+   update esp yourself.
+
+14.4 Further Reading
+
+     * For more information about different scheduling algorithms, see
+       [205]http://wiki.osdev.org/Scheduling_Algorithms
+
+14.4 References
+
+   [1] Andrew Tanenbaum, 2007. Modern operating systems, 3rd edition.
+   Prentice Hall, Inc.,
+
+   [2] The royal institute of technology, [206]http://www.kth.se,
+
+   [3] Wikipedia, Hexadecimal,
+   [207]http://en.wikipedia.org/wiki/Hexadecimal,
+
+   [4] OSDev, OSDev, [208]http://wiki.osdev.org/Main_Page,
+
+   [5] James Molloy, James m's kernel development tutorial,
+   [209]http://www.jamesmolloy.co.uk/tutorial_html/,
+
+   [6] Canonical Ltd, Ubuntu, [210]http://www.ubuntu.com/,
+
+   [7] Oracle, Oracle vM virtualBox, [211]http://www.virtualbox.org/,
+
+   [8] Dennis M. Ritchie Brian W. Kernighan, 1988. The c programming
+   language, second edition. Prentice Hall, Inc.,
+
+   [9] Wikipedia, C (programming language),
+   [212]http://en.wikipedia.org/wiki/C_(programming_language),
+
+   [10] Free Software Foundation, GCC, the gNU compiler collection,
+   [213]http://gcc.gnu.org/,
+
+   [11] NASM, NASM: The netwide assembler, [214]http://www.nasm.us/,
+
+   [12] Wikipedia, Bash,
+   [215]http://en.wikipedia.org/wiki/Bash_%28Unix_shell%29,
+
+   [13] Free Software Foundation, GNU make,
+   [216]http://www.gnu.org/software/make/,
+
+   [14] Volker Ruppert, bochs: The open souce iA-32 emulation project,
+   [217]http://bochs.sourceforge.net/,
+
+   [15] QEMU, QEMU, [218]http://wiki.qemu.org/Main_Page,
+
+   [16] Wikipedia, BIOS, [219]https://en.wikipedia.org/wiki/BIOS,
+
+   [17] Free Software Foundation, GNU gRUB,
+   [220]http://www.gnu.org/software/grub/,
+
+   [18] Wikipedia, Executable and linkable format,
+   [221]http://en.wikipedia.org/wiki/Executable_and_Linkable_Format,
+
+   [19] Free Software Foundation, Multiboot specification version 0.6.96,
+   [222]http://www.gnu.org/software/ grub/manual/multiboot/multiboot.html,
+
+   [20] GNU, GNU binutils, [223]http://www.gnu.org/software/binutils/,
+
+   [21] Lars Nodeen, Bug #426419: configure: error: GRUB requires a
+   working absolute objcopy,
+   [224]https://bugs.launchpad.net/ubuntu/+source/grub/+bug/426419,
+
+   [22] Wikipedia, ISO image, [225]http://en.wikipedia.org/wiki/ISO_image,
+
+   [23] Bochs, bochsrc,
+   [226]http://bochs.sourceforge.net/doc/docbook/user/bochsrc.html,
+
+   [24] NASM, RESB and friends: Declaring uninitialized data,
+   [227]http://www.nasm.us/doc/nasmdoc3.htm,
+
+   [25] Wikipedia, x86 calling conventions,
+   [228]http://en.wikipedia.org/wiki/X86_calling_conventions,
+
+   [26] Wikipedia, Framebuffer,
+   [229]http://en.wikipedia.org/wiki/Framebuffer,
+
+   [27] Wikipedia, VGA-compatible text mode,
+   [230]http://en.wikipedia.org/wiki/VGA-compatible_text_mode,
+
+   [28] Wikipedia, ASCII, [231]https://en.wikipedia.org/wiki/Ascii,
+
+   [29] OSDev, VGA hardware, [232]http://wiki.osdev.org/VGA_Hardware,
+
+   [30] Wikipedia, Serial port,
+   [233]http://en.wikipedia.org/wiki/Serial_port,
+
+   [31] OSDev, Serial ports, [234]http://wiki.osdev.org/Serial_ports,
+
+   [32] WikiBooks, Serial programming/8250 uART programming,
+   [235]http://en.wikibooks.org/wiki/Serial_Programming/
+   8250_UART_Programming,
+
+   [33] Intel, Intel 64 and iA-32 architectures software developer's
+   manual vol. 3A, [236]http://www.intel.com/content/
+   www/us/en/architecture-and-technology/64-ia-32-architectures-software-d
+   eveloper-vol-3a-part-1-manual.html/,
+
+   [34] NASM, Multi-line macros,
+   [237]http://www.nasm.us/doc/nasmdoc4.html#section-4.3,
+
+   [35] SIGOPS, i386 interrupt handling,
+   [238]http://www.acm.uiuc.edu/sigops/roll_your_own/i386/irq.html,
+
+   [36] Andries Brouwer, Keyboard scancodes, [239]http://www.win.tue.nl/,
+
+   [37] Steve Chamberlain, Using ld, the gNU linker,
+   [240]http://www.math.utah.edu/docs/info/ld_toc.html,
+
+   [38] OSDev, Page frame allocation,
+   [241]http://wiki.osdev.org/Page_Frame_Allocation,
+
+   [39] OSDev, Programmable interval timer,
+   [242]http://wiki.osdev.org/Programmable_Interval_Timer,
+     __________________________________________________________________
+
+    1. The bootloader must fit into the master boot record (MBR) boot
+       sector of a hard drive, which is only 512 bytes large.[243]�
+
+References
+
+   1. https://github.com/littleosbook/littleosbook/
+   2. https://littleosbook.github.io/book.pdf
+   3. https://littleosbook.github.io/#introduction
+   4. https://littleosbook.github.io/#about-the-book
+   5. https://littleosbook.github.io/#the-reader
+   6. https://littleosbook.github.io/#credits-thanks-and-acknowledgements
+   7. https://littleosbook.github.io/#contributors
+   8. https://littleosbook.github.io/#changes-and-corrections
+   9. https://littleosbook.github.io/#issues-and-where-to-get-help
+  10. https://littleosbook.github.io/#license
+  11. https://littleosbook.github.io/#first-steps
+  12. https://littleosbook.github.io/#tools
+  13. https://littleosbook.github.io/#quick-setup
+  14. https://littleosbook.github.io/#programming-languages
+  15. https://littleosbook.github.io/#host-operating-system
+  16. https://littleosbook.github.io/#build-system
+  17. https://littleosbook.github.io/#virtual-machine
+  18. https://littleosbook.github.io/#booting
+  19. https://littleosbook.github.io/#bios
+  20. https://littleosbook.github.io/#the-bootloader
+  21. https://littleosbook.github.io/#the-operating-system
+  22. https://littleosbook.github.io/#hello-cafebabe
+  23. https://littleosbook.github.io/#compiling-the-operating-system
+  24. https://littleosbook.github.io/#linking-the-kernel
+  25. https://littleosbook.github.io/#obtaining-grub
+  26. https://littleosbook.github.io/#building-an-iso-image
+  27. https://littleosbook.github.io/#running-bochs
+  28. https://littleosbook.github.io/#further-reading
+  29. https://littleosbook.github.io/#getting-to-c
+  30. https://littleosbook.github.io/#setting-up-a-stack
+  31. https://littleosbook.github.io/#calling-c-code-from-assembly
+  32. https://littleosbook.github.io/#packing-structs
+  33. https://littleosbook.github.io/#compiling-c-code
+  34. https://littleosbook.github.io/#build-tools
+  35. https://littleosbook.github.io/#further-reading-1
+  36. https://littleosbook.github.io/#output
+  37. https://littleosbook.github.io/#interacting-with-the-hardware
+  38. https://littleosbook.github.io/#the-framebuffer
+  39. https://littleosbook.github.io/#writing-text
+  40. https://littleosbook.github.io/#moving-the-cursor
+  41. https://littleosbook.github.io/#the-driver
+  42. https://littleosbook.github.io/#the-serial-ports
+  43. https://littleosbook.github.io/#configuring-the-serial-port
+  44. https://littleosbook.github.io/#configuring-the-line
+  45. https://littleosbook.github.io/#configuring-the-buffers
+  46. https://littleosbook.github.io/#configuring-the-modem
+  47. https://littleosbook.github.io/#writing-data-to-the-serial-port
+  48. https://littleosbook.github.io/#configuring-bochs
+  49. https://littleosbook.github.io/#the-driver-1
+  50. https://littleosbook.github.io/#further-reading-2
+  51. https://littleosbook.github.io/#segmentation
+  52. https://littleosbook.github.io/#accessing-memory
+  53. https://littleosbook.github.io/#the-global-descriptor-table-gdt
+  54. https://littleosbook.github.io/#loading-the-gdt
+  55. https://littleosbook.github.io/#further-reading-3
+  56. https://littleosbook.github.io/#interrupts-and-input
+  57. https://littleosbook.github.io/#interrupts-handlers
+  58. https://littleosbook.github.io/#creating-an-entry-in-the-idt
+  59. https://littleosbook.github.io/#handling-an-interrupt
+  60. https://littleosbook.github.io/#creating-a-generic-interrupt-handler
+  61. https://littleosbook.github.io/#loading-the-idt
+  62. https://littleosbook.github.io/#programmable-interrupt-controller-pic
+  63. https://littleosbook.github.io/#reading-input-from-the-keyboard
+  64. https://littleosbook.github.io/#further-reading-4
+  65. https://littleosbook.github.io/#the-road-to-user-mode
+  66. https://littleosbook.github.io/#loading-an-external-program
+  67. https://littleosbook.github.io/#grub-modules
+  68. https://littleosbook.github.io/#executing-a-program
+  69. https://littleosbook.github.io/#a-very-simple-program
+  70. https://littleosbook.github.io/#compiling
+  71. https://littleosbook.github.io/#finding-the-program-in-memory
+  72. https://littleosbook.github.io/#jumping-to-the-code
+  73. https://littleosbook.github.io/#the-beginning-of-user-mode
+  74. https://littleosbook.github.io/#a-short-introduction-to-virtual-memory
+  75. https://littleosbook.github.io/#virtual-memory-through-segmentation
+  76. https://littleosbook.github.io/#further-reading-5
+  77. https://littleosbook.github.io/#paging
+  78. https://littleosbook.github.io/#why-paging
+  79. https://littleosbook.github.io/#paging-in-x86
+  80. https://littleosbook.github.io/#identity-paging
+  81. https://littleosbook.github.io/#enabling-paging
+  82. https://littleosbook.github.io/#a-few-details
+  83. https://littleosbook.github.io/#paging-and-the-kernel
+  84. https://littleosbook.github.io/#reasons-to-not-identity-map-the-kernel
+  85. https://littleosbook.github.io/#the-virtual-address-for-the-kernel
+  86. https://littleosbook.github.io/#placing-the-kernel-at-0xc0000000
+  87. https://littleosbook.github.io/#higher-half-linker-script
+  88. https://littleosbook.github.io/#entering-the-higher-half
+  89. https://littleosbook.github.io/#running-in-the-higher-half
+  90. https://littleosbook.github.io/#virtual-memory-through-paging
+  91. https://littleosbook.github.io/#further-reading-6
+  92. https://littleosbook.github.io/#page-frame-allocation
+  93. https://littleosbook.github.io/#managing-available-memory
+  94. https://littleosbook.github.io/#how-much-memory-is-there
+  95. https://littleosbook.github.io/#managing-available-memory-1
+  96. https://littleosbook.github.io/#how-can-we-access-a-page-frame
+  97. https://littleosbook.github.io/#a-kernel-heap
+  98. https://littleosbook.github.io/#further-reading-7
+  99. https://littleosbook.github.io/#user-mode
+ 100. https://littleosbook.github.io/#segments-for-user-mode
+ 101. https://littleosbook.github.io/#setting-up-for-user-mode
+ 102. https://littleosbook.github.io/#entering-user-mode
+ 103. https://littleosbook.github.io/#using-c-for-user-mode-programs
+ 104. https://littleosbook.github.io/#a-c-library
+ 105. https://littleosbook.github.io/#further-reading-8
+ 106. https://littleosbook.github.io/#file-systems
+ 107. https://littleosbook.github.io/#why-a-file-system
+ 108. https://littleosbook.github.io/#a-simple-read-only-file-system
+ 109. https://littleosbook.github.io/#inodes-and-writable-file-systems
+ 110. https://littleosbook.github.io/#a-virtual-file-system
+ 111. https://littleosbook.github.io/#further-reading-9
+ 112. https://littleosbook.github.io/#system-calls
+ 113. https://littleosbook.github.io/#designing-system-calls
+ 114. https://littleosbook.github.io/#implementing-system-calls
+ 115. https://littleosbook.github.io/#further-reading-10
+ 116. https://littleosbook.github.io/#multitasking
+ 117. https://littleosbook.github.io/#creating-new-processes
+ 118. https://littleosbook.github.io/#cooperative-scheduling-with-yielding
+ 119. https://littleosbook.github.io/#preemptive-scheduling-with-interrupts
+ 120. https://littleosbook.github.io/#programmable-interval-timer
+ 121. https://littleosbook.github.io/#separate-kernel-stacks-for-processes
+ 122. https://littleosbook.github.io/#difficulties-with-preemptive-scheduling
+ 123. https://littleosbook.github.io/#further-reading-11
+ 124. https://littleosbook.github.io/#first-steps
+ 125. https://littleosbook.github.io/#getting-to-c
+ 126. https://littleosbook.github.io/#output
+ 127. https://littleosbook.github.io/#segmentation
+ 128. https://littleosbook.github.io/#interrupts-and-input
+ 129. https://littleosbook.github.io/#the-road-to-user-mode
+ 130. https://littleosbook.github.io/#a-short-introduction-to-virtual-memory
+ 131. https://littleosbook.github.io/#paging
+ 132. https://littleosbook.github.io/#page-frame-allocation
+ 133. https://littleosbook.github.io/#user-mode
+ 134. https://littleosbook.github.io/#file-systems
+ 135. https://littleosbook.github.io/#system-calls
+ 136. https://littleosbook.github.io/#multitasking
+ 137. http://progit.org/
+ 138. https://github.com/alexschneider
+ 139. https://github.com/Avidanborisov
+ 140. https://github.com/nirs
+ 141. https://github.com/kedarmhaswade
+ 142. https://github.com/vamanea
+ 143. https://github.com/ansjob
+ 144. https://github.com/littleosbook/littleosbook/issues
+ 145. http://creativecommons.org/licenses/by-nc-sa/3.0/us/
+ 146. https://littleosbook.github.io/#fn1
+ 147. https://littleosbook.github.io/#getting-to-c
+ 148. ftp://alpha.gnu.org/gnu/grub/grub-0.97.tar.gz
+ 149. http://littleosbook.github.com/files/stage2_eltorito
+ 150. http://duartes.org/gustavo/blog/post/how-computers-boot-up
+ 151. http://duartes.org/gustavo/blog/post/kernel-boot-process
+ 152. http://wiki.osdev.org/Boot_Sequence
+ 153. https://littleosbook.github.io/#the-framebuffer
+ 154. http://en.wikibooks.org/wiki/Serial_Programming/8250_UART_Programming#UART_Registers
+ 155. http://wiki.osdev.org/Serial_ports
+ 156. https://littleosbook.github.io/#paging
+ 157. https://littleosbook.github.io/#packing-structs
+ 158. http://wiki.osdev.org/Segmentation
+ 159. http://en.wikipedia.org/wiki/X86_memory_segmentation
+ 160. https://littleosbook.github.io/#getting-to-c
+ 161. http://wiki.osdev.org/Interrupts
+ 162. https://littleosbook.github.io/#scheduling
+ 163. https://littleosbook.github.io/#paging
+ 164. http://www.gnu.org/software/grub/manual/multiboot/html_node/multiboot.h.html
+ 165. https://littleosbook.github.io/#segmentation
+ 166. https://littleosbook.github.io/#paging
+ 167. https://littleosbook.github.io/#page-frame-allocation
+ 168. https://littleosbook.github.io/#paging
+ 169. https://littleosbook.github.io/#page-frame-allocation
+ 170. http://lwn.net/Articles/253361/
+ 171. http://duartes.org/gustavo/blog/post/memory-translation-and-segmentation
+ 172. https://littleosbook.github.io/#user-mode
+ 173. https://littleosbook.github.io/#linking-the-kernel
+ 174. https://littleosbook.github.io/#linking-the-kernel
+ 175. https://littleosbook.github.io/#page-frame-allocation
+ 176. http://en.wikipedia.org/wiki/Paging
+ 177. http://wiki.osdev.org/Paging
+ 178. http://wiki.osdev.org/Higher_Half_bare_bones
+ 179. http://duartes.org/gustavo/blog/post/anatomy-of-a-program-in-memory
+ 180. http://flint.cs.yale.edu/cs422/doc/ELF_Format.pdf
+ 181. http://wiki.osdev.org/Page_Frame_Allocation
+ 182. https://littleosbook.github.io/#the-global-descriptor-table-gdt
+ 183. https://littleosbook.github.io/#segmentation
+ 184. https://littleosbook.github.io/#system-calls
+ 185. https://littleosbook.github.io/#creating-and-loading-the-gdt
+ 186. https://littleosbook.github.io/#system-calls
+ 187. http://duartes.org/gustavo/blog/post/cpu-rings-privilege-and-protection
+ 188. https://littleosbook.github.io/#further-reading-6
+ 189. https://littleosbook.github.io/#further-reading-6
+ 190. http://plan9.bell-labs.com/plan9/index.html
+ 191. http://en.wikipedia.org/wiki/Inode
+ 192. http://en.wikipedia.org/wiki/Inode_pointer_structure
+ 193. http://www.arl.wustl.edu/~fredk/Courses/cs523/fall01/Papers/kleiman86vnodes.pdf
+ 194. http://static.usenix.org/publications/library/proceedings/bsdcon02/full_papers/kamp/kamp_html/index.html
+ 195. https://littleosbook.github.io/#further-reading-7
+ 196. https://littleosbook.github.io/#user-mode
+ 197. https://littleosbook.github.io/#entering-user-mode
+ 198. http://en.wikipedia.org/wiki/POSIX
+ 199. http://bluemaster.iu.hio.no/edu/dark/lin-asm/syscalls.html
+ 200. http://en.wikipedia.org/wiki/System_call
+ 201. https://littleosbook.github.io/#setting-up-for-user-mode
+ 202. https://littleosbook.github.io/#user-mode
+ 203. https://littleosbook.github.io/#entering-user-mode
+ 204. https://littleosbook.github.io/#user-mode
+ 205. http://wiki.osdev.org/Scheduling_Algorithms
+ 206. http://www.kth.se/
+ 207. http://en.wikipedia.org/wiki/Hexadecimal
+ 208. http://wiki.osdev.org/Main_Page
+ 209. http://www.jamesmolloy.co.uk/tutorial_html/
+ 210. http://www.ubuntu.com/
+ 211. http://www.virtualbox.org/
+ 212. http://en.wikipedia.org/wiki/C_(programming_language)
+ 213. http://gcc.gnu.org/
+ 214. http://www.nasm.us/
+ 215. http://en.wikipedia.org/wiki/Bash_%28Unix_shell%29
+ 216. http://www.gnu.org/software/make/
+ 217. http://bochs.sourceforge.net/
+ 218. http://wiki.qemu.org/Main_Page
+ 219. https://en.wikipedia.org/wiki/BIOS
+ 220. http://www.gnu.org/software/grub/
+ 221. http://en.wikipedia.org/wiki/Executable_and_Linkable_Format
+ 222. http://www.gnu.org/software/grub/manual/multiboot/multiboot.html
+ 223. http://www.gnu.org/software/binutils/
+ 224. https://bugs.launchpad.net/ubuntu/+source/grub/+bug/426419
+ 225. http://en.wikipedia.org/wiki/ISO_image
+ 226. http://bochs.sourceforge.net/doc/docbook/user/bochsrc.html
+ 227. http://www.nasm.us/doc/nasmdoc3.htm
+ 228. http://en.wikipedia.org/wiki/X86_calling_conventions
+ 229. http://en.wikipedia.org/wiki/Framebuffer
+ 230. http://en.wikipedia.org/wiki/VGA-compatible_text_mode
+ 231. https://en.wikipedia.org/wiki/Ascii
+ 232. http://wiki.osdev.org/VGA_Hardware
+ 233. http://en.wikipedia.org/wiki/Serial_port
+ 234. http://wiki.osdev.org/Serial_ports
+ 235. http://en.wikibooks.org/wiki/Serial_Programming/8250_UART_Programming
+ 236. http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-3a-part-1-manual.html/
+ 237. http://www.nasm.us/doc/nasmdoc4.html#section-4.3
+ 238. http://www.acm.uiuc.edu/sigops/roll_your_own/i386/irq.html
+ 239. http://www.win.tue.nl/
+ 240. http://www.math.utah.edu/docs/info/ld_toc.html
+ 241. http://wiki.osdev.org/Page_Frame_Allocation
+ 242. http://wiki.osdev.org/Programmable_Interval_Timer
+ 243. https://littleosbook.github.io/#fnref1
diff --git a/doc/os.phil-opp.com_vga-text-mode.txt b/doc/os.phil-opp.com_vga-text-mode.txt
new file mode 100644
index 0000000..466153f
--- /dev/null
+++ b/doc/os.phil-opp.com_vga-text-mode.txt
@@ -0,0 +1,983 @@
+   #[1]RSS feed for os.phil-opp.com
+
+[2]Writing an OS in Rust
+
+   Philipp Oppermann's blog
+
+   [3]� All Posts
+
+Table of Contents
+
+    1. [4]The VGA Text Buffer
+    2. [5]A Rust Module
+         1. [6]Colors
+         2. [7]Text Buffer
+         3. [8]Printing
+         4. [9]Volatile
+         5. [10]Formatting Macros
+         6. [11]Newlines
+    3. [12]A Global Interface
+         1. [13]Lazy Statics
+         2. [14]Spinlocks
+         3. [15]Safety
+         4. [16]A println Macro
+         5. [17]Hello World using println
+         6. [18]Printing Panic Messages
+    4. [19]Summary
+    5. [20]What's next?
+    6. [21]Comments
+
+VGA Text Mode
+
+   Feb 26, 2018
+
+   The [22]VGA text mode is a simple way to print text to the screen. In
+   this post, we create an interface that makes its usage safe and simple
+   by encapsulating all unsafety in a separate module. We also implement
+   support for Rust's [23]formatting macros.
+
+   This blog is openly developed on [24]GitHub. If you have any problems
+   or questions, please open an issue there. You can also leave comments
+   [25]at the bottom. The complete source code for this post can be found
+   in the [26]post-03 branch.
+   Table of Contents
+     * [27]The VGA Text Buffer
+     * [28]A Rust Module
+          + [29]Colors
+          + [30]Text Buffer
+          + [31]Printing
+          + [32]Volatile
+          + [33]Formatting Macros
+          + [34]Newlines
+     * [35]A Global Interface
+          + [36]Lazy Statics
+          + [37]Spinlocks
+          + [38]Safety
+          + [39]A println Macro
+          + [40]Hello World using println
+          + [41]Printing Panic Messages
+     * [42]Summary
+     * [43]What's next?
+     * [44]Comments
+
+The VGA Text Buffer
+
+   To print a character to the screen in VGA text mode, one has to write
+   it to the text buffer of the VGA hardware. The VGA text buffer is a
+   two-dimensional array with typically 25 rows and 80 columns, which is
+   directly rendered to the screen. Each array entry describes a single
+   screen character through the following format:
+   Bit(s)      Value
+   0-7    ASCII code point
+   8-11   Foreground color
+   12-14  Background color
+   15     Blink
+
+   The first byte represents the character that should be printed in the
+   [45]ASCII encoding. To be more specific, it isn't exactly ASCII, but a
+   character set named [46]code page 437 with some additional characters
+   and slight modifications. For simplicity, we will proceed to call it an
+   ASCII character in this post.
+
+   The second byte defines how the character is displayed. The first four
+   bits define the foreground color, the next three bits the background
+   color, and the last bit whether the character should blink. The
+   following colors are available:
+   Number   Color    Number + Bright Bit Bright Color
+   0x0    Black      0x8                 Dark Gray
+   0x1    Blue       0x9                 Light Blue
+   0x2    Green      0xa                 Light Green
+   0x3    Cyan       0xb                 Light Cyan
+   0x4    Red        0xc                 Light Red
+   0x5    Magenta    0xd                 Pink
+   0x6    Brown      0xe                 Yellow
+   0x7    Light Gray 0xf                 White
+
+   Bit 4 is the bright bit, which turns, for example, blue into light
+   blue. For the background color, this bit is repurposed as the blink
+   bit.
+
+   The VGA text buffer is accessible via [47]memory-mapped I/O to the
+   address 0xb8000. This means that reads and writes to that address don't
+   access the RAM but directly access the text buffer on the VGA hardware.
+   This means we can read and write it through normal memory operations to
+   that address.
+
+   Note that memory-mapped hardware might not support all normal RAM
+   operations. For example, a device could only support byte-wise reads
+   and return junk when a u64 is read. Fortunately, the text buffer
+   [48]supports normal reads and writes, so we don't have to treat it in a
+   special way.
+
+A Rust Module
+
+   Now that we know how the VGA buffer works, we can create a Rust module
+   to handle printing:
+//in src/main.rs
+mod vga_buffer;
+
+   For the content of this module, we create a new src/vga_buffer.rs file.
+   All of the code below goes into our new module (unless specified
+   otherwise).
+
+Colors
+
+   First, we represent the different colors using an enum:
+// in src/vga_buffer.rs
+
+#[allow(dead_code)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(u8)]
+pub enum Color {
+    Black = 0,
+    Blue = 1,
+    Green = 2,
+    Cyan = 3,
+    Red = 4,
+    Magenta = 5,
+    Brown = 6,
+    LightGray = 7,
+    DarkGray = 8,
+    LightBlue = 9,
+    LightGreen = 10,
+    LightCyan = 11,
+    LightRed = 12,
+    Pink = 13,
+    Yellow = 14,
+    White = 15,
+}
+
+   We use a [49]C-like enum here to explicitly specify the number for each
+   color. Because of the repr(u8) attribute, each enum variant is stored
+   as a u8. Actually 4 bits would be sufficient, but Rust doesn't have a
+   u4 type.
+
+   Normally the compiler would issue a warning for each unused variant. By
+   using the #[allow(dead_code)] attribute, we disable these warnings for
+   the Color enum.
+
+   By [50]deriving the [51]Copy, [52]Clone, [53]Debug, [54]PartialEq, and
+   [55]Eq traits, we enable [56]copy semantics for the type and make it
+   printable and comparable.
+
+   To represent a full color code that specifies foreground and background
+   color, we create a [57]newtype on top of u8:
+// in src/vga_buffer.rs
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(transparent)]
+struct ColorCode(u8);
+
+impl ColorCode {
+    fn new(foreground: Color, background: Color) -> ColorCode {
+        ColorCode((background as u8) << 4 | (foreground as u8))
+    }
+}
+
+   The ColorCode struct contains the full color byte, containing
+   foreground and background color. Like before, we derive the Copy and
+   Debug traits for it. To ensure that the ColorCode has the exact same
+   data layout as a u8, we use the [58]repr(transparent) attribute.
+
+Text Buffer
+
+   Now we can add structures to represent a screen character and the text
+   buffer:
+// in src/vga_buffer.rs
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(C)]
+struct ScreenChar {
+    ascii_character: u8,
+    color_code: ColorCode,
+}
+
+const BUFFER_HEIGHT: usize = 25;
+const BUFFER_WIDTH: usize = 80;
+
+#[repr(transparent)]
+struct Buffer {
+    chars: [[ScreenChar; BUFFER_WIDTH]; BUFFER_HEIGHT],
+}
+
+   Since the field ordering in default structs is undefined in Rust, we
+   need the [59]repr(C) attribute. It guarantees that the struct's fields
+   are laid out exactly like in a C struct and thus guarantees the correct
+   field ordering. For the Buffer struct, we use [60]repr(transparent)
+   again to ensure that it has the same memory layout as its single field.
+
+   To actually write to screen, we now create a writer type:
+// in src/vga_buffer.rs
+
+pub struct Writer {
+    column_position: usize,
+    color_code: ColorCode,
+    buffer: &'static mut Buffer,
+}
+
+   The writer will always write to the last line and shift lines up when a
+   line is full (or on \n). The column_position field keeps track of the
+   current position in the last row. The current foreground and background
+   colors are specified by color_code and a reference to the VGA buffer is
+   stored in buffer. Note that we need an [61]explicit lifetime here to
+   tell the compiler how long the reference is valid. The [62]'static
+   lifetime specifies that the reference is valid for the whole program
+   run time (which is true for the VGA text buffer).
+
+Printing
+
+   Now we can use the Writer to modify the buffer's characters. First we
+   create a method to write a single ASCII byte:
+// in src/vga_buffer.rs
+
+impl Writer {
+    pub fn write_byte(&mut self, byte: u8) {
+        match byte {
+            b'\n' => self.new_line(),
+            byte => {
+                if self.column_position >= BUFFER_WIDTH {
+                    self.new_line();
+                }
+
+                let row = BUFFER_HEIGHT - 1;
+                let col = self.column_position;
+
+                let color_code = self.color_code;
+                self.buffer.chars[row][col] = ScreenChar {
+                    ascii_character: byte,
+                    color_code,
+                };
+                self.column_position += 1;
+            }
+        }
+    }
+
+    fn new_line(&mut self) {/* TODO */}
+}
+
+   If the byte is the [63]newline byte \n, the writer does not print
+   anything. Instead, it calls a new_line method, which we'll implement
+   later. Other bytes get printed to the screen in the second match case.
+
+   When printing a byte, the writer checks if the current line is full. In
+   that case, a new_line call is used to wrap the line. Then it writes a
+   new ScreenChar to the buffer at the current position. Finally, the
+   current column position is advanced.
+
+   To print whole strings, we can convert them to bytes and print them
+   one-by-one:
+// in src/vga_buffer.rs
+
+impl Writer {
+    pub fn write_string(&mut self, s: &str) {
+        for byte in s.bytes() {
+            match byte {
+                // printable ASCII byte or newline
+                0x20..=0x7e | b'\n' => self.write_byte(byte),
+                // not part of printable ASCII range
+                _ => self.write_byte(0xfe),
+            }
+
+        }
+    }
+}
+
+   The VGA text buffer only supports ASCII and the additional bytes of
+   [64]code page 437. Rust strings are [65]UTF-8 by default, so they might
+   contain bytes that are not supported by the VGA text buffer. We use a
+   match to differentiate printable ASCII bytes (a newline or anything in
+   between a space character and a ~ character) and unprintable bytes. For
+   unprintable bytes, we print a fS character, which has the hex code 0xfe
+   on the VGA hardware.
+
+Try it out!
+
+   To write some characters to the screen, you can create a temporary
+   function:
+// in src/vga_buffer.rs
+
+pub fn print_something() {
+    let mut writer = Writer {
+        column_position: 0,
+        color_code: ColorCode::new(Color::Yellow, Color::Black),
+        buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+    };
+
+    writer.write_byte(b'H');
+    writer.write_string("ello ");
+    writer.write_string("W�rld!");
+}
+
+   It first creates a new Writer that points to the VGA buffer at 0xb8000.
+   The syntax for this might seem a bit strange: First, we cast the
+   integer 0xb8000 as a mutable [66]raw pointer. Then we convert it to a
+   mutable reference by dereferencing it (through *) and immediately
+   borrowing it again (through &mut). This conversion requires an
+   [67]unsafe block, since the compiler can't guarantee that the raw
+   pointer is valid.
+
+   Then it writes the byte b'H' to it. The b prefix creates a [68]byte
+   literal, which represents an ASCII character. By writing the strings
+   "ello " and "W�rld!", we test our write_string method and the handling
+   of unprintable characters. To see the output, we need to call the
+   print_something function from our _start function:
+// in src/main.rs
+
+#[no_mangle]
+pub extern "C" fn _start() -> ! {
+    vga_buffer::print_something();
+
+    loop {}
+}
+
+   When we run our project now, a Hello WfSfSrld! should be printed in the
+   lower left corner of the screen in yellow:
+
+   QEMU output with a yellow Hello WfSfSrld! in the lower left corner
+
+   Notice that the � is printed as two fS characters. That's because � is
+   represented by two bytes in [69]UTF-8, which both don't fall into the
+   printable ASCII range. In fact, this is a fundamental property of
+   UTF-8: the individual bytes of multi-byte values are never valid ASCII.
+
+Volatile
+
+   We just saw that our message was printed correctly. However, it might
+   not work with future Rust compilers that optimize more aggressively.
+
+   The problem is that we only write to the Buffer and never read from it
+   again. The compiler doesn't know that we really access VGA buffer
+   memory (instead of normal RAM) and knows nothing about the side effect
+   that some characters appear on the screen. So it might decide that
+   these writes are unnecessary and can be omitted. To avoid this
+   erroneous optimization, we need to specify these writes as
+   [70]volatile. This tells the compiler that the write has side effects
+   and should not be optimized away.
+
+   In order to use volatile writes for the VGA buffer, we use the
+   [71]volatile library. This crate (this is how packages are called in
+   the Rust world) provides a Volatile wrapper type with read and write
+   methods. These methods internally use the [72]read_volatile and
+   [73]write_volatile functions of the core library and thus guarantee
+   that the reads/writes are not optimized away.
+
+   We can add a dependency on the volatile crate by adding it to the
+   dependencies section of our Cargo.toml:
+# in Cargo.toml
+
+[dependencies]
+volatile = "0.2.6"
+
+   Make sure to specify volatile version 0.2.6. Newer versions of the
+   crate are not compatible with this post. 0.2.6 is the [74]semantic
+   version number. For more information, see the [75]Specifying
+   Dependencies guide of the cargo documentation.
+
+   Let's use it to make writes to the VGA buffer volatile. We update our
+   Buffer type as follows:
+// in src/vga_buffer.rs
+
+use volatile::Volatile;
+
+struct Buffer {
+    chars: [[Volatile<ScreenChar>; BUFFER_WIDTH]; BUFFER_HEIGHT],
+}
+
+   Instead of a ScreenChar, we're now using a Volatile<ScreenChar>. (The
+   Volatile type is [76]generic and can wrap (almost) any type). This
+   ensures that we can't accidentally write to it "normally". Instead, we
+   have to use the write method now.
+
+   This means that we have to update our Writer::write_byte method:
+// in src/vga_buffer.rs
+
+impl Writer {
+    pub fn write_byte(&mut self, byte: u8) {
+        match byte {
+            b'\n' => self.new_line(),
+            byte => {
+                ...
+
+                self.buffer.chars[row][col].write(ScreenChar {
+                    ascii_character: byte,
+                    color_code,
+                });
+                ...
+            }
+        }
+    }
+    ...
+}
+
+   Instead of a typical assignment using =, we're now using the write
+   method. Now we can guarantee that the compiler will never optimize away
+   this write.
+
+Formatting Macros
+
+   It would be nice to support Rust's formatting macros, too. That way, we
+   can easily print different types, like integers or floats. To support
+   them, we need to implement the [77]core::fmt::Write trait. The only
+   required method of this trait is write_str, which looks quite similar
+   to our write_string method, just with a fmt::Result return type:
+// in src/vga_buffer.rs
+
+use core::fmt;
+
+impl fmt::Write for Writer {
+    fn write_str(&mut self, s: &str) -> fmt::Result {
+        self.write_string(s);
+        Ok(())
+    }
+}
+
+   The Ok(()) is just a Ok Result containing the () type.
+
+   Now we can use Rust's built-in write!/writeln! formatting macros:
+// in src/vga_buffer.rs
+
+pub fn print_something() {
+    use core::fmt::Write;
+    let mut writer = Writer {
+        column_position: 0,
+        color_code: ColorCode::new(Color::Yellow, Color::Black),
+        buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+    };
+
+    writer.write_byte(b'H');
+    writer.write_string("ello! ");
+    write!(writer, "The numbers are {} and {}", 42, 1.0/3.0).unwrap();
+}
+
+   Now you should see a Hello! The numbers are 42 and 0.3333333333333333
+   at the bottom of the screen. The write! call returns a Result which
+   causes a warning if not used, so we call the [78]unwrap function on it,
+   which panics if an error occurs. This isn't a problem in our case,
+   since writes to the VGA buffer never fail.
+
+Newlines
+
+   Right now, we just ignore newlines and characters that don't fit into
+   the line anymore. Instead, we want to move every character one line up
+   (the top line gets deleted) and start at the beginning of the last line
+   again. To do this, we add an implementation for the new_line method of
+   Writer:
+// in src/vga_buffer.rs
+
+impl Writer {
+    fn new_line(&mut self) {
+        for row in 1..BUFFER_HEIGHT {
+            for col in 0..BUFFER_WIDTH {
+                let character = self.buffer.chars[row][col].read();
+                self.buffer.chars[row - 1][col].write(character);
+            }
+        }
+        self.clear_row(BUFFER_HEIGHT - 1);
+        self.column_position = 0;
+    }
+
+    fn clear_row(&mut self, row: usize) {/* TODO */}
+}
+
+   We iterate over all the screen characters and move each character one
+   row up. Note that the upper bound of the range notation (..) is
+   exclusive. We also omit the 0th row (the first range starts at 1)
+   because it's the row that is shifted off screen.
+
+   To finish the newline code, we add the clear_row method:
+// in src/vga_buffer.rs
+
+impl Writer {
+    fn clear_row(&mut self, row: usize) {
+        let blank = ScreenChar {
+            ascii_character: b' ',
+            color_code: self.color_code,
+        };
+        for col in 0..BUFFER_WIDTH {
+            self.buffer.chars[row][col].write(blank);
+        }
+    }
+}
+
+   This method clears a row by overwriting all of its characters with a
+   space character.
+
+A Global Interface
+
+   To provide a global writer that can be used as an interface from other
+   modules without carrying a Writer instance around, we try to create a
+   static WRITER:
+// in src/vga_buffer.rs
+
+pub static WRITER: Writer = Writer {
+    column_position: 0,
+    color_code: ColorCode::new(Color::Yellow, Color::Black),
+    buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+};
+
+   However, if we try to compile it now, the following errors occur:
+error[E0015]: calls in statics are limited to constant functions, tuple structs
+and tuple variants
+ --> src/vga_buffer.rs:7:17
+  |
+7 |     color_code: ColorCode::new(Color::Yellow, Color::Black),
+  |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error[E0396]: raw pointers cannot be dereferenced in statics
+ --> src/vga_buffer.rs:8:22
+  |
+8 |     buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+  |                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ dereference of raw point
+er in constant
+
+error[E0017]: references in statics may only refer to immutable values
+ --> src/vga_buffer.rs:8:22
+  |
+8 |     buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+  |                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ statics require immutabl
+e values
+
+error[E0017]: references in statics may only refer to immutable values
+ --> src/vga_buffer.rs:8:13
+  |
+8 |     buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+  |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ statics require immuta
+ble values
+
+   To understand what's happening here, we need to know that statics are
+   initialized at compile time, in contrast to normal variables that are
+   initialized at run time. The component of the Rust compiler that
+   evaluates such initialization expressions is called the "[79]const
+   evaluator". Its functionality is still limited, but there is ongoing
+   work to expand it, for example in the "[80]Allow panicking in
+   constants" RFC.
+
+   The issue with ColorCode::new would be solvable by using [81]const
+   functions, but the fundamental problem here is that Rust's const
+   evaluator is not able to convert raw pointers to references at compile
+   time. Maybe it will work someday, but until then, we have to find
+   another solution.
+
+Lazy Statics
+
+   The one-time initialization of statics with non-const functions is a
+   common problem in Rust. Fortunately, there already exists a good
+   solution in a crate named [82]lazy_static. This crate provides a
+   lazy_static! macro that defines a lazily initialized static. Instead of
+   computing its value at compile time, the static lazily initializes
+   itself when accessed for the first time. Thus, the initialization
+   happens at runtime, so arbitrarily complex initialization code is
+   possible.
+
+   Let's add the lazy_static crate to our project:
+# in Cargo.toml
+
+[dependencies.lazy_static]
+version = "1.0"
+features = ["spin_no_std"]
+
+   We need the spin_no_std feature, since we don't link the standard
+   library.
+
+   With lazy_static, we can define our static WRITER without problems:
+// in src/vga_buffer.rs
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+    pub static ref WRITER: Writer = Writer {
+        column_position: 0,
+        color_code: ColorCode::new(Color::Yellow, Color::Black),
+        buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+    };
+}
+
+   However, this WRITER is pretty useless since it is immutable. This
+   means that we can't write anything to it (since all the write methods
+   take &mut self). One possible solution would be to use a [83]mutable
+   static. But then every read and write to it would be unsafe since it
+   could easily introduce data races and other bad things. Using static
+   mut is highly discouraged. There were even proposals to [84]remove it.
+   But what are the alternatives? We could try to use an immutable static
+   with a cell type like [85]RefCell or even [86]UnsafeCell that provides
+   [87]interior mutability. But these types aren't [88]Sync (with good
+   reason), so we can't use them in statics.
+
+Spinlocks
+
+   To get synchronized interior mutability, users of the standard library
+   can use [89]Mutex. It provides mutual exclusion by blocking threads
+   when the resource is already locked. But our basic kernel does not have
+   any blocking support or even a concept of threads, so we can't use it
+   either. However, there is a really basic kind of mutex in computer
+   science that requires no operating system features: the [90]spinlock.
+   Instead of blocking, the threads simply try to lock it again and again
+   in a tight loop, thus burning CPU time until the mutex is free again.
+
+   To use a spinning mutex, we can add the [91]spin crate as a dependency:
+# in Cargo.toml
+[dependencies]
+spin = "0.5.2"
+
+   Then we can use the spinning mutex to add safe [92]interior mutability
+   to our static WRITER:
+// in src/vga_buffer.rs
+
+use spin::Mutex;
+...
+lazy_static! {
+    pub static ref WRITER: Mutex<Writer> = Mutex::new(Writer {
+        column_position: 0,
+        color_code: ColorCode::new(Color::Yellow, Color::Black),
+        buffer: unsafe { &mut *(0xb8000 as *mut Buffer) },
+    });
+}
+
+   Now we can delete the print_something function and print directly from
+   our _start function:
+// in src/main.rs
+#[no_mangle]
+pub extern "C" fn _start() -> ! {
+    use core::fmt::Write;
+    vga_buffer::WRITER.lock().write_str("Hello again").unwrap();
+    write!(vga_buffer::WRITER.lock(), ", some numbers: {} {}", 42, 1.337).unwrap
+();
+
+    loop {}
+}
+
+   We need to import the fmt::Write trait in order to be able to use its
+   functions.
+
+Safety
+
+   Note that we only have a single unsafe block in our code, which is
+   needed to create a Buffer reference pointing to 0xb8000. Afterwards,
+   all operations are safe. Rust uses bounds checking for array accesses
+   by default, so we can't accidentally write outside the buffer. Thus, we
+   encoded the required conditions in the type system and are able to
+   provide a safe interface to the outside.
+
+A println Macro
+
+   Now that we have a global writer, we can add a println macro that can
+   be used from anywhere in the codebase. Rust's [93]macro syntax is a bit
+   strange, so we won't try to write a macro from scratch. Instead, we
+   look at the source of the [94]println! macro in the standard library:
+#[macro_export]
+macro_rules! println {
+    () => (print!("\n"));
+    ($($arg:tt)*) => (print!("{}\n", format_args!($($arg)*)));
+}
+
+   Macros are defined through one or more rules, similar to match arms.
+   The println macro has two rules: The first rule is for invocations
+   without arguments, e.g., println!(), which is expanded to print!("\n")
+   and thus just prints a newline. The second rule is for invocations with
+   parameters such as println!("Hello") or println!("Number: {}", 4). It
+   is also expanded to an invocation of the print! macro, passing all
+   arguments and an additional newline \n at the end.
+
+   The #[macro_export] attribute makes the macro available to the whole
+   crate (not just the module it is defined in) and external crates. It
+   also places the macro at the crate root, which means we have to import
+   the macro through use std::println instead of std::macros::println.
+
+   The [95]print! macro is defined as:
+#[macro_export]
+macro_rules! print {
+    ($($arg:tt)*) => ($crate::io::_print(format_args!($($arg)*)));
+}
+
+   The macro expands to a call of the [96]_print function in the io
+   module. The [97]$crate variable ensures that the macro also works from
+   outside the std crate by expanding to std when it's used in other
+   crates.
+
+   The [98]format_args macro builds a [99]fmt::Arguments type from the
+   passed arguments, which is passed to _print. The [100]_print function
+   of libstd calls print_to, which is rather complicated because it
+   supports different Stdout devices. We don't need that complexity since
+   we just want to print to the VGA buffer.
+
+   To print to the VGA buffer, we just copy the println! and print!
+   macros, but modify them to use our own _print function:
+// in src/vga_buffer.rs
+
+#[macro_export]
+macro_rules! print {
+    ($($arg:tt)*) => ($crate::vga_buffer::_print(format_args!($($arg)*)));
+}
+
+#[macro_export]
+macro_rules! println {
+    () => ($crate::print!("\n"));
+    ($($arg:tt)*) => ($crate::print!("{}\n", format_args!($($arg)*)));
+}
+
+#[doc(hidden)]
+pub fn _print(args: fmt::Arguments) {
+    use core::fmt::Write;
+    WRITER.lock().write_fmt(args).unwrap();
+}
+
+   One thing that we changed from the original println definition is that
+   we prefixed the invocations of the print! macro with $crate too. This
+   ensures that we don't need to import the print! macro too if we only
+   want to use println.
+
+   Like in the standard library, we add the #[macro_export] attribute to
+   both macros to make them available everywhere in our crate. Note that
+   this places the macros in the root namespace of the crate, so importing
+   them via use crate::vga_buffer::println does not work. Instead, we have
+   to do use crate::println.
+
+   The _print function locks our static WRITER and calls the write_fmt
+   method on it. This method is from the Write trait, which we need to
+   import. The additional unwrap() at the end panics if printing isn't
+   successful. But since we always return Ok in write_str, that should not
+   happen.
+
+   Since the macros need to be able to call _print from outside of the
+   module, the function needs to be public. However, since we consider
+   this a private implementation detail, we add the [101]doc(hidden)
+   attribute to hide it from the generated documentation.
+
+Hello World using println
+
+   Now we can use println in our _start function:
+// in src/main.rs
+
+#[no_mangle]
+pub extern "C" fn _start() {
+    println!("Hello World{}", "!");
+
+    loop {}
+}
+
+   Note that we don't have to import the macro in the main function,
+   because it already lives in the root namespace.
+
+   As expected, we now see a "Hello World!" on the screen:
+
+   QEMU printing "Hello World!"
+
+Printing Panic Messages
+
+   Now that we have a println macro, we can use it in our panic function
+   to print the panic message and the location of the panic:
+// in main.rs
+
+/// This function is called on panic.
+#[panic_handler]
+fn panic(info: &PanicInfo) -> ! {
+    println!("{}", info);
+    loop {}
+}
+
+   When we now insert panic!("Some panic message"); in our _start
+   function, we get the following output:
+
+   QEMU printing "panicked at `Some panic message', src/main.rs:28:5
+
+   So we know not only that a panic has occurred, but also the panic
+   message and where in the code it happened.
+
+Summary
+
+   In this post, we learned about the structure of the VGA text buffer and
+   how it can be written through the memory mapping at address 0xb8000. We
+   created a Rust module that encapsulates the unsafety of writing to this
+   memory-mapped buffer and presents a safe and convenient interface to
+   the outside.
+
+   Thanks to cargo, we also saw how easy it is to add dependencies on
+   third-party libraries. The two dependencies that we added, lazy_static
+   and spin, are very useful in OS development and we will use them in
+   more places in future posts.
+
+What's next?
+
+   The next post explains how to set up Rust's built-in unit test
+   framework. We will then create some basic unit tests for the VGA buffer
+   module from this post.
+
+Support Me
+
+   Creating and [102]maintaining this blog and the associated libraries is
+   a lot of work, but I really enjoy doing it. By supporting me, you allow
+   me to invest more time in new content, new features, and continuous
+   maintenance.
+
+   The best way to support me is to [103]sponsor me on GitHub, since they
+   don't charge any fees. If you prefer other platforms, I also have
+   [104]Patreon and [105]Donorbox accounts. The latter is the most
+   flexible as it supports multiple currencies and one-time contributions.
+
+   Thank you!
+     __________________________________________________________________
+
+   [106]� A Minimal Rust Kernel [107]Testing �
+     __________________________________________________________________
+
+Comments
+
+   Do you have a problem, want to share feedback, or discuss further
+   ideas? Feel free to leave a comment here! Please stick to English and
+   follow Rust's [108]code of conduct. This comment thread directly maps
+   to a [109]discussion on GitHub, so you can also comment there if you
+   prefer.
+
+   Instead of authenticating the [110]giscus application, you can also
+   comment directly [111]on GitHub.
+
+Other Languages
+
+     * [112]Chinese (simplified)
+     * [113]Japanese
+     * [114]Persian
+     * [115]Korean
+     __________________________________________________________________
+
+   � 2022. All rights reserved. [116]License [117]Contact
+
+References
+
+   Visible links:
+   1. https://os.phil-opp.com/rss.xml
+   2. https://os.phil-opp.com/
+   3. https://os.phil-opp.com/
+   4. https://os.phil-opp.com/vga-text-mode/#the-vga-text-buffer
+   5. https://os.phil-opp.com/vga-text-mode/#a-rust-module
+   6. https://os.phil-opp.com/vga-text-mode/#colors
+   7. https://os.phil-opp.com/vga-text-mode/#text-buffer
+   8. https://os.phil-opp.com/vga-text-mode/#printing
+   9. https://os.phil-opp.com/vga-text-mode/#volatile
+  10. https://os.phil-opp.com/vga-text-mode/#formatting-macros
+  11. https://os.phil-opp.com/vga-text-mode/#newlines
+  12. https://os.phil-opp.com/vga-text-mode/#a-global-interface
+  13. https://os.phil-opp.com/vga-text-mode/#lazy-statics
+  14. https://os.phil-opp.com/vga-text-mode/#spinlocks
+  15. https://os.phil-opp.com/vga-text-mode/#safety
+  16. https://os.phil-opp.com/vga-text-mode/#a-println-macro
+  17. https://os.phil-opp.com/vga-text-mode/#hello-world-using-println
+  18. https://os.phil-opp.com/vga-text-mode/#printing-panic-messages
+  19. https://os.phil-opp.com/vga-text-mode/#summary
+  20. https://os.phil-opp.com/vga-text-mode/#what-s-next
+  21. https://os.phil-opp.com/vga-text-mode/#comments
+  22. https://en.wikipedia.org/wiki/VGA-compatible_text_mode
+  23. https://doc.rust-lang.org/std/fmt/#related-macros
+  24. https://github.com/phil-opp/blog_os
+  25. https://os.phil-opp.com/vga-text-mode/#comments
+  26. https://github.com/phil-opp/blog_os/tree/post-03
+  27. https://os.phil-opp.com/vga-text-mode/#the-vga-text-buffer
+  28. https://os.phil-opp.com/vga-text-mode/#a-rust-module
+  29. https://os.phil-opp.com/vga-text-mode/#colors
+  30. https://os.phil-opp.com/vga-text-mode/#text-buffer
+  31. https://os.phil-opp.com/vga-text-mode/#printing
+  32. https://os.phil-opp.com/vga-text-mode/#volatile
+  33. https://os.phil-opp.com/vga-text-mode/#formatting-macros
+  34. https://os.phil-opp.com/vga-text-mode/#newlines
+  35. https://os.phil-opp.com/vga-text-mode/#a-global-interface
+  36. https://os.phil-opp.com/vga-text-mode/#lazy-statics
+  37. https://os.phil-opp.com/vga-text-mode/#spinlocks
+  38. https://os.phil-opp.com/vga-text-mode/#safety
+  39. https://os.phil-opp.com/vga-text-mode/#a-println-macro
+  40. https://os.phil-opp.com/vga-text-mode/#hello-world-using-println
+  41. https://os.phil-opp.com/vga-text-mode/#printing-panic-messages
+  42. https://os.phil-opp.com/vga-text-mode/#summary
+  43. https://os.phil-opp.com/vga-text-mode/#what-s-next
+  44. https://os.phil-opp.com/vga-text-mode/#comments
+  45. https://en.wikipedia.org/wiki/ASCII
+  46. https://en.wikipedia.org/wiki/Code_page_437
+  47. https://en.wikipedia.org/wiki/Memory-mapped_I/O
+  48. https://web.stanford.edu/class/cs140/projects/pintos/specs/freevga/vga/vgamem.htm#manip
+  49. https://doc.rust-lang.org/rust-by-example/custom_types/enum/c_like.html
+  50. https://doc.rust-lang.org/rust-by-example/trait/derive.html
+  51. https://doc.rust-lang.org/nightly/core/marker/trait.Copy.html
+  52. https://doc.rust-lang.org/nightly/core/clone/trait.Clone.html
+  53. https://doc.rust-lang.org/nightly/core/fmt/trait.Debug.html
+  54. https://doc.rust-lang.org/nightly/core/cmp/trait.PartialEq.html
+  55. https://doc.rust-lang.org/nightly/core/cmp/trait.Eq.html
+  56. https://doc.rust-lang.org/1.30.0/book/first-edition/ownership.html#copy-types
+  57. https://doc.rust-lang.org/rust-by-example/generics/new_types.html
+  58. https://doc.rust-lang.org/nomicon/other-reprs.html#reprtransparent
+  59. https://doc.rust-lang.org/nightly/nomicon/other-reprs.html#reprc
+  60. https://doc.rust-lang.org/nomicon/other-reprs.html#reprtransparent
+  61. https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html#lifetime-annotation-syntax
+  62. https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html#the-static-lifetime
+  63. https://en.wikipedia.org/wiki/Newline
+  64. https://en.wikipedia.org/wiki/Code_page_437
+  65. https://www.fileformat.info/info/unicode/utf8.htm
+  66. https://doc.rust-lang.org/book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer
+  67. https://doc.rust-lang.org/book/ch19-01-unsafe-rust.html
+  68. https://doc.rust-lang.org/reference/tokens.html#byte-literals
+  69. https://www.fileformat.info/info/unicode/utf8.htm
+  70. https://en.wikipedia.org/wiki/Volatile_(computer_programming)
+  71. https://docs.rs/volatile
+  72. https://doc.rust-lang.org/nightly/core/ptr/fn.read_volatile.html
+  73. https://doc.rust-lang.org/nightly/core/ptr/fn.write_volatile.html
+  74. https://semver.org/
+  75. https://doc.crates.io/specifying-dependencies.html
+  76. https://doc.rust-lang.org/book/ch10-01-syntax.html
+  77. https://doc.rust-lang.org/nightly/core/fmt/trait.Write.html
+  78. https://doc.rust-lang.org/core/result/enum.Result.html#method.unwrap
+  79. https://rustc-dev-guide.rust-lang.org/const-eval.html
+  80. https://github.com/rust-lang/rfcs/pull/2345
+  81. https://doc.rust-lang.org/reference/const_eval.html#const-functions
+  82. https://docs.rs/lazy_static/1.0.1/lazy_static/
+  83. https://doc.rust-lang.org/book/ch19-01-unsafe-rust.html#accessing-or-modifying-a-mutable-static-variable
+  84. https://internals.rust-lang.org/t/pre-rfc-remove-static-mut/1437
+  85. https://doc.rust-lang.org/book/ch15-05-interior-mutability.html#keeping-track-of-borrows-at-runtime-with-refcellt
+  86. https://doc.rust-lang.org/nightly/core/cell/struct.UnsafeCell.html
+  87. https://doc.rust-lang.org/book/ch15-05-interior-mutability.html
+  88. https://doc.rust-lang.org/nightly/core/marker/trait.Sync.html
+  89. https://doc.rust-lang.org/nightly/std/sync/struct.Mutex.html
+  90. https://en.wikipedia.org/wiki/Spinlock
+  91. https://crates.io/crates/spin
+  92. https://doc.rust-lang.org/book/ch15-05-interior-mutability.html
+  93. https://doc.rust-lang.org/nightly/book/ch19-06-macros.html#declarative-macros-with-macro_rules-for-general-metaprogramming
+  94. https://doc.rust-lang.org/nightly/std/macro.println!.html
+  95. https://doc.rust-lang.org/nightly/std/macro.print!.html
+  96. https://github.com/rust-lang/rust/blob/29f5c699b11a6a148f097f82eaa05202f8799bbc/src/libstd/io/stdio.rs#L698
+  97. https://doc.rust-lang.org/1.30.0/book/first-edition/macros.html#the-variable-crate
+  98. https://doc.rust-lang.org/nightly/std/macro.format_args.html
+  99. https://doc.rust-lang.org/nightly/core/fmt/struct.Arguments.html
+ 100. https://github.com/rust-lang/rust/blob/29f5c699b11a6a148f097f82eaa05202f8799bbc/src/libstd/io/stdio.rs#L698
+ 101. https://doc.rust-lang.org/nightly/rustdoc/write-documentation/the-doc-attribute.html#hidden
+ 102. https://os.phil-opp.com/status-update/
+ 103. https://github.com/sponsors/phil-opp
+ 104. https://www.patreon.com/phil_opp
+ 105. https://donorbox.org/phil-opp
+ 106. https://os.phil-opp.com/minimal-rust-kernel/
+ 107. https://os.phil-opp.com/testing/
+ 108. https://www.rust-lang.org/policies/code-of-conduct
+ 109. https://github.com/phil-opp/blog_os/discussions/categories/post-comments?discussions_q=%22VGA%20Text%20Mode%22%20in%3Atitle
+ 110. https://giscus.app/
+ 111. https://github.com/phil-opp/blog_os/discussions/categories/post-comments?discussions_q=%22VGA%20Text%20Mode%22%20in%3Atitle
+ 112. https://os.phil-opp.com/zh-CN/vga-text-mode/
+ 113. https://os.phil-opp.com/ja/vga-text-mode/
+ 114. https://os.phil-opp.com/fa/vga-text-mode/
+ 115. https://os.phil-opp.com/ko/vga-text-mode/
+ 116. https://github.com/phil-opp/blog_os#license
+ 117. https://os.phil-opp.com/contact/
+
+   Hidden links:
+ 119. https://os.phil-opp.com/vga-text-mode/#the-vga-text-buffer
+ 120. https://os.phil-opp.com/vga-text-mode/#a-rust-module
+ 121. https://os.phil-opp.com/vga-text-mode/#colors
+ 122. https://os.phil-opp.com/vga-text-mode/#text-buffer
+ 123. https://os.phil-opp.com/vga-text-mode/#printing
+ 124. https://os.phil-opp.com/vga-text-mode/#try-it-out
+ 125. https://os.phil-opp.com/vga-text-mode/#volatile
+ 126. https://os.phil-opp.com/vga-text-mode/#formatting-macros
+ 127. https://os.phil-opp.com/vga-text-mode/#newlines
+ 128. https://os.phil-opp.com/vga-text-mode/#a-global-interface
+ 129. https://os.phil-opp.com/vga-text-mode/#lazy-statics
+ 130. https://os.phil-opp.com/vga-text-mode/#spinlocks
+ 131. https://os.phil-opp.com/vga-text-mode/#safety
+ 132. https://os.phil-opp.com/vga-text-mode/#a-println-macro
+ 133. https://os.phil-opp.com/vga-text-mode/#hello-world-using-println
+ 134. https://os.phil-opp.com/vga-text-mode/#printing-panic-messages
+ 135. https://os.phil-opp.com/vga-text-mode/#summary
+ 136. https://os.phil-opp.com/vga-text-mode/#what-s-next
diff --git a/doc/web.yl.is.s.u-tokyo.ac.jp_~tosh_talk.txt b/doc/web.yl.is.s.u-tokyo.ac.jp_~tosh_talk.txt
new file mode 100644
index 0000000..3e7cde5
--- /dev/null
+++ b/doc/web.yl.is.s.u-tokyo.ac.jp_~tosh_talk.txt
@@ -0,0 +1,58 @@
+                   TALK : Typed Assembly Language for Kernel
+
+   TALK is a typed assembly language which is flexible and expressive
+   enough to implement important OS facilities, such as memory management
+   (e.g., malloc/free) and multi-thread management.
+   [1]TALK for IA-32
+   (last updated 2005-10-27 : version 0.0.0.2. [2]ChangeLog. [3]Old
+   versions
+   TALK is licensed under the [4]GNU General Public License.
+     __________________________________________________________________
+
+    Notes
+
+   The "assembler" just emits binary executables annotated with type
+   information. It does not perform type-checks.
+   The type-check is performed by the "verifier".
+     __________________________________________________________________
+
+Documents
+
+   We are sorry but we have not prepared any user manual of TALK. (Please
+   see the programs in [5]TOS at this time)
+
+  Further Readings
+
+    1. Toshiyuki Maeda and Akinori Yonezawa.
+       [6]Writing practical memory management code with a strictly typed
+       assembly language.
+       In 3rd Workshop on Semantics, Program Analysis, and Computing
+       Environments for Memory Management (SPACE 2006). Informal
+       proceedings.
+    2. Toshiyuki Maeda.
+       [7]Ph.D. Thesis (2006)
+    3. Toshiyuki Maeda and Akinori Yonezawa.
+       [8]Typed Assembly Language for SMP/Multi-core CPUs (Japanese).
+       (2008)
+       In 6th Dependable System Workshop (DSW 2008).
+     __________________________________________________________________
+
+Contact Information
+
+   [9]Toshiyuki Maeda
+   tosh @ is.s.u-tokyo.ac.jp
+
+                                                                   [10]../
+
+References
+
+   1. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/talk/talk/prototype/talk-0.0.0.2.tar.gz
+   2. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/talk/talk/prototype/ChangeLog
+   3. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/talk/talk/prototype/
+   4. http://www.gnu.org/copyleft/gpl.html
+   5. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/tos/
+   6. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/talk/talk.pdf
+   7. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/talk/thesis.pdf
+   8. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/talk/tosh-dsw2008.pdf
+   9. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/
+  10. http://web.yl.is.s.u-tokyo.ac.jp/~tosh/
diff --git a/doc/www.independent-software.com_operating-system-development-jumping-to-protected-mode.txt b/doc/www.independent-software.com_operating-system-development-jumping-to-protected-mode.txt
new file mode 100644
index 0000000..9098d44
--- /dev/null
+++ b/doc/www.independent-software.com_operating-system-development-jumping-to-protected-mode.txt
@@ -0,0 +1,272 @@
+    #[1]Independent Software
+
+   [2][independent-software-logo.svg] [ ]
+   [3]About [4]Philosophy [5]Web Development [6]Portfolio [7]Blog
+
+   [8]osdev [9]asm
+
+   OPERATING SYSTEM DEVELOPMENT
+   JUMPING TO PROTECTED MODE
+
+   -
+
+   Oct 24, 2013
+
+   In the [10]previous section of this tutorial for writing your own toy
+   operating system, we discussed memory and focused on the 21st address
+   line (the "A20 line") that must be enabled before we can have access to
+   the full 4GB of memory, which is a prerequisite to entering protected
+   mode. Now it's time to jump to protected mode.
+
+   In fact, all we've done in the last few articles is prepare for
+   entering protected mode. We've set up a global descriptor table (GDT),
+   an interrupt descriptor table (IDT) and enabled the A20 line. All that
+   remains is actually jumping to protected mode where we'll finally be
+   able to execute 32-bit code so we can focus on our kernel.
+
+   This article is part of a series on toy operating system development.
+
+   [11]View the series index
+   [INS: :INS]
+
+   In the [12]previous section of this tutorial for writing your own toy
+   operating system, we discussed memory and focused on the 21st address
+   line (the "A20 line") that must be enabled before we can have access to
+   the full 4GB of memory, which is a prerequisite to entering protected
+   mode. Now it's time to jump to protected mode.
+
+   In fact, all we've done in the last few articles is prepare for
+   entering protected mode. We've set up a global descriptor table (GDT),
+   an interrupt descriptor table (IDT) and enabled the A20 line. All that
+   remains is actually jumping to protected mode where we'll finally be
+   able to execute 32-bit code so we can focus on our kernel.
+
+   This article is part of a series on toy operating system development.
+
+   [13]View the series index
+
+Control registers
+
+   What we've seen so far while rolling our own first-stage and
+   second-stage boot loaders, is familiar processor registers: AX, BX, CX,
+   DX, segments like CS, DS, ES, SS, the instruction pointer IP and the
+   stack pointer SP. The 80386+ processors actually introduce some new
+   registers what will become important when we switch to 32-bit
+   programming.
+
+   For one thing, existing registers get wider. Where we used to have
+   access to AX (16 bits wide), we will soon have access to EAX (32-bits
+   wide), as well as EBX, ECX and EDX. We'll gain additional segment
+   registers as well (FS and GS). Similarly, the instruction pointer
+   becomes EIP (32 bits again) and so on. That's great, and requires no
+   great deal of explanation.
+
+   However, we gain other registers as well. The Intel 80386 processor
+   comes armed with a set of control registers, and we'll need one of them
+   to switch to protected mode so we might as well talk about it now.
+   These control registers change or control the behavior of the CPU. This
+   includes interrupt control, switching addressing mode, paging and
+   coprocessor control. The new registers are called CR0, CR1, CR2, CR3
+   and CR4.
+
+   The first control register, CR0, has various control flags that modify
+   the basic operation of the processor.
+   Bit Name Full name Description
+   31 PG Paging If 1, enable paging and use the CR3 register, else disable
+   paging
+   30 CD Cache disable Globally enables/disable the memory cache
+   29 NW Not-write through Globally enables/disable write-back caching
+   18 AM Alignment mask Alignment check enabled if AM set, AC flag (in
+   EFLAGS register) set, and privilege level is 3
+   16 WP Write protect Determines whether the CPU can write to pages
+   marked read-only
+   5 NE Numeric error Enable internal x87 floating point error reporting
+   when set, else enables PC style x87 error detection
+   4 ET Extension type On the 386, it allowed to specify whether the
+   external math coprocessor was an 80287 or 80387
+   3 TS Task switched Allows saving x87 task context upon a task switch
+   only after x87 instruction used
+   2 EM Emulation If set, no x87 floating point unit present, if clear,
+   x87 FPU present
+   1 MP Monitor co-processor Controls interaction of WAIT/FWAIT
+   instructions with TS flag in CR0
+   0 PE Protected mode enable If 1, system is in protected mode, else
+   system is in real mode
+
+   Some of these bits will become important for us later on, but for now,
+   we're interested in the very first bit: the PE bit. It enables
+   protected mode.
+
+Switching to protected mode
+
+   In order to make the switch to protected mode, all we have to do is
+   enable the PE-bit in the CR0 register, like so:
+.macro mGoProtected
+  mov    eax, cr0
+  or     eax, 1
+  mov    cr0, eax
+.endm
+
+Clearing the prefetch queue
+
+   By setting the PE bit in the CR0 register, we have just switched to
+   protected mode. This means that all instructions are now in 32-bit
+   format. As a result, some of them are encoded differently. Some
+   instructions may take up more bytes in their binary form, some others
+   maybe less, and other still remain unchanged. At any rate, we can't
+   continue executing any more code just yet, because of the prefetch
+   queue.
+
+   You see, CPUs are built to be fast. One of the tricks of the trade that
+   make CPUs ever faster is to have the CPU load a range of instructions
+   from memory to be executed at the same time, rather than just one. This
+   is called prefetching. After all, the CPU in the Intel 80386 processor
+   can read 4 bytes (32 bits) at the same time from memory, and that might
+   well be more than one instruction. For technical reasons, even more
+   might be read and decoded before it's actually executed by the CPU.
+
+   The consequence of this is that the CPU may have read some instructions
+   from memory when it was still in 16-bits mode, decoded them, and is now
+   ready to execute them. They won't work, because the processor is now in
+   protected 32-bits mode!
+
+   Luckily, there is trick to make the processor discard the instructions
+   it has already prefetched, and that trick is jumping. Whenever the
+   processor encounters a jump instruction, any instructions it had read
+   past that instructions become worthless and must be discarded.
+   Consequently, jumping clears the prefetch queue:
+.macro mClearPrefetchQueue
+    jmp clear_prefetch_queue:
+    nop
+    nop
+  clear_prefetch_queue:
+.endm
+
+   There are some nop instructions after the jump, to make doubly sure
+   that the prefetch queue is fully emptied.
+
+Setting up the 80386's registers
+
+   We've talked the talk, now the time has come to walk the walk. Next,
+   we'll set up the memory segments that our future kernel code will use.
+   This is no longer done by putting in memory addresses, but by
+   specifying selector numbers. We'll set all our data segments (ds, es,
+   fs and gs) as well as the stack segment (ss) to use selector 2 from the
+   global descriptor table, which corresponds to the data segment that we
+   had defined in our GDT:
+.macro mSetup386Segments
+    mov    ax, 0x10      # Byte offset for selector 2
+    mov    ds, ax        # (remember, each descriptor is 8 bytes)
+    mov    es, ax
+    mov    fs, ax
+    mov    gs, ax
+    mov    ss, ax
+    mov    esp, 0x2ffff  # Set stack to grown downwards from 0x30000
+.endm
+
+Jumping to the kernel
+
+   Yes! Assuming that our second-stage boot loader had previously loaded
+   our kernel image into memory at linear address 0x20000 (using the same
+   FAT reading at file reading routines we had already developed for the
+   first-stage bootloader), we can now jump to it and start executing it.
+
+   The jump to the kernel must be done with a 32-bit long jump
+   instruction. Here we face a small snag. All the code in our
+   second-stage boot loader is 16-bit code, because that's the way it's
+   compiled. Therefore, we cannot actually specify a 32-bit long jump; it
+   will get compiled as a 16-bit jump. To get around this, we'll encode
+   the long jump instruction ourselves just like a 32-bit assembler would
+   do it.
+
+   Our long jump instruction will jump to linear memory address 0x20000,
+   in the first selector of the GDT (our code segment), which has offset
+   0x8:
+.macro mJumpToKernel
+  .byte 0x66
+  .byte 0xEA
+  .int  0x20000            # offset
+  .word 0x0008             # selector word
+.endm
+
+   This will transfer control to the kernel code, which we have yet to
+   write. If you're feeling adventurous, why not write a small 32-bit
+   assembly program that places the value 0x41 at linear address 0xb8000?
+   That will show the letter "A" at the top-left corner of the screen and
+   can be executed in protected mode (you can't use the BIOS interrupts to
+   write to the screen anymore).
+
+   Actually, we'll do that in the next part of this tutorial anyway!
+
+Summary
+
+   Whew! It's been quite a trip, but we have now reached protected mode
+   and are ready to write a simple kernel. At least at this point, all of
+   the machine's memory and protected mode features will be at our
+   disposal.
+
+   In this tutorial, we've wrapped up the final bits necessary to enter
+   protected mode:
+     * We've enabled the PE-bit in the CR0 register, thus switching to
+       protected mode
+     * We've cleared the prefetch queue so that no 16-bit instructions
+       remained in the CPU which can no longer be executed
+     * We've setup the registers for use by the 32-bit kernel program
+     * We've executed a long jump to the kernel code
+
+   [14]Continue on to the next part of this guide!
+   [INS: :INS]
+   Please enable JavaScript to view the [15]comments powered by Disqus.
+
+   [independent-software-logo.svg]
+     * 1513 Av. Vladimir Lenine
+     * Maputo, Mozambique
+     * [16]info@independent-software.com
+     * +258 82 304 26 35
+
+Links
+
+     * [17]Web Development
+     * [18]Portfolio
+     * [19]Operating System Development
+
+Social
+
+     * [20]Facebook
+     * [21]Github
+     * [22]LinkedIn
+     * [23]Twitter
+     * [24]Google+
+     * [25]Atom feed
+
+References
+
+   Visible links:
+   1. http://www.independent-software.com/feed.xml
+   2. http://www.independent-software.com/
+   3. http://www.independent-software.com/about-independent-software.html
+   4. http://www.independent-software.com/philosophy.html
+   5. http://www.independent-software.com/web-development.html
+   6. http://www.independent-software.com/portfolio.html
+   7. http://www.independent-software.com/blog.html
+   8. http://www.independent-software.com/category/osdev.html
+   9. http://www.independent-software.com/category/asm.html
+  10. http://www.independent-software.com/{{%20site.baseurl%20}}{%%20post_url%20/osdev/2013-10-23-operating-system-development-enabling-a20-line%20%}
+  11. http://www.independent-software.com/operating-system-development.html
+  12. http://www.independent-software.com/operating-system-development-enabling-a20-line.html
+  13. http://www.independent-software.com/operating-system-development.html
+  14. http://www.independent-software.com/operating-system-development-first-and-second-stage-bootloaders.html
+  15. https://disqus.com/?ref_noscript
+  16. mailto:info@independent-software.com
+  17. http://www.independent-software.com/web-development.html
+  18. http://www.independent-software.com/portfolio.html
+  19. http://www.independent-software.com/operating-system-development.html
+  20. https://www.facebook.com/Independent-Software-295360497495620/
+  21. https://github.com/henck
+  22. https://www.linkedin.com/company/independent-software-mozambique-
+  23. https://twitter.com/IndependentSw
+  24. https://google.com/+Independent-software
+  25. http://www.independent-software.com/feed.xml
+
+   Hidden links:
+  27. http://www.independent-software.com/operating-system-development-jumping-to-protected-mode.html
diff --git a/doc/www.jwz.org_doc_worse-is-better.txt b/doc/www.jwz.org_doc_worse-is-better.txt
new file mode 100644
index 0000000..4cdcad1
--- /dev/null
+++ b/doc/www.jwz.org_doc_worse-is-better.txt
@@ -0,0 +1,200 @@
+   Previous: [1]Lisp's Apparent Failures Up: [2]Lisp's Apparent Failures
+   Next: [3]Good Lisp Programming is Hard
+
+                       The Rise of ``Worse is Better''
+                             By Richard Gabriel
+
+   I and just about every designer of Common Lisp and CLOS has had extreme
+   exposure to the MIT/Stanford style of design. The essence of this style
+   can be captured by the phrase ``the right thing.'' To such a designer
+   it is important to get all of the following characteristics right:
+
+     * Simplicity-the design must be simple, both in implementation and
+       interface. It is more important for the interface to be simple than
+       the implementation.
+     * Correctness-the design must be correct in all observable aspects.
+       Incorrectness is simply not allowed.
+     * Consistency-the design must not be inconsistent. A design is
+       allowed to be slightly less simple and less complete to avoid
+       inconsistency. Consistency is as important as correctness.
+     * Completeness-the design must cover as many important situations as
+       is practical. All reasonably expected cases must be covered.
+       Simplicity is not allowed to overly reduce completeness.
+
+   I believe most people would agree that these are good characteristics.
+   I will call the use of this philosophy of design the ``MIT approach.''
+   Common Lisp (with CLOS) and Scheme represent the MIT approach to design
+   and implementation.
+
+   The worse-is-better philosophy is only slightly different:
+
+     * Simplicity-the design must be simple, both in implementation and
+       interface. It is more important for the implementation to be simple
+       than the interface. Simplicity is the most important consideration
+       in a design.
+     * Correctness-the design must be correct in all observable aspects.
+       It is slightly better to be simple than correct.
+     * Consistency-the design must not be overly inconsistent. Consistency
+       can be sacrificed for simplicity in some cases, but it is better to
+       drop those parts of the design that deal with less common
+       circumstances than to introduce either implementational complexity
+       or inconsistency.
+     * Completeness-the design must cover as many important situations as
+       is practical. All reasonably expected cases should be covered.
+       Completeness can be sacrificed in favor of any other quality. In
+       fact, completeness must sacrificed whenever implementation
+       simplicity is jeopardized. Consistency can be sacrificed to achieve
+       completeness if simplicity is retained; especially worthless is
+       consistency of interface.
+
+   Early Unix and C are examples of the use of this school of design, and
+   I will call the use of this design strategy the ``New Jersey
+   approach.'' I have intentionally caricatured the worse-is-better
+   philosophy to convince you that it is obviously a bad philosophy and
+   that the New Jersey approach is a bad approach.
+
+   However, I believe that worse-is-better, even in its strawman form, has
+   better survival characteristics than the-right-thing, and that the New
+   Jersey approach when used for software is a better approach than the
+   MIT approach.
+
+   Let me start out by retelling a story that shows that the
+   MIT/New-Jersey distinction is valid and that proponents of each
+   philosophy actually believe their philosophy is better.
+
+   Two famous people, one from MIT and another from Berkeley (but working
+   on Unix) once met to discuss operating system issues. The person from
+   MIT was knowledgeable about ITS (the MIT AI Lab operating system) and
+   had been reading the Unix sources. He was interested in how Unix solved
+   the PC loser-ing problem. The PC loser-ing problem occurs when a user
+   program invokes a system routine to perform a lengthy operation that
+   might have significant state, such as IO buffers. If an interrupt
+   occurs during the operation, the state of the user program must be
+   saved. Because the invocation of the system routine is usually a single
+   instruction, the PC of the user program does not adequately capture the
+   state of the process. The system routine must either back out or press
+   forward. The right thing is to back out and restore the user program PC
+   to the instruction that invoked the system routine so that resumption
+   of the user program after the interrupt, for example, re-enters the
+   system routine. It is called ``PC loser-ing'' because the PC is being
+   coerced into ``loser mode,'' where ``loser'' is the affectionate name
+   for ``user'' at MIT.
+
+   The MIT guy did not see any code that handled this case and asked the
+   New Jersey guy how the problem was handled. The New Jersey guy said
+   that the Unix folks were aware of the problem, but the solution was for
+   the system routine to always finish, but sometimes an error code would
+   be returned that signaled that the system routine had failed to
+   complete its action. A correct user program, then, had to check the
+   error code to determine whether to simply try the system routine again.
+   The MIT guy did not like this solution because it was not the right
+   thing.
+
+   The New Jersey guy said that the Unix solution was right because the
+   design philosophy of Unix was simplicity and that the right thing was
+   too complex. Besides, programmers could easily insert this extra test
+   and loop. The MIT guy pointed out that the implementation was simple
+   but the interface to the functionality was complex. The New Jersey guy
+   said that the right tradeoff has been selected in Unix-namely,
+   implementation simplicity was more important than interface simplicity.
+
+   The MIT guy then muttered that sometimes it takes a tough man to make a
+   tender chicken, but the New Jersey guy didn't understand (I'm not sure
+   I do either).
+
+   Now I want to argue that worse-is-better is better. C is a programming
+   language designed for writing Unix, and it was designed using the New
+   Jersey approach. C is therefore a language for which it is easy to
+   write a decent compiler, and it requires the programmer to write text
+   that is easy for the compiler to interpret. Some have called C a fancy
+   assembly language. Both early Unix and C compilers had simple
+   structures, are easy to port, require few machine resources to run, and
+   provide about 50%--80% of what you want from an operating system and
+   programming language.
+
+   Half the computers that exist at any point are worse than median
+   (smaller or slower). Unix and C work fine on them. The worse-is-better
+   philosophy means that implementation simplicity has highest priority,
+   which means Unix and C are easy to port on such machines. Therefore,
+   one expects that if the 50% functionality Unix and C support is
+   satisfactory, they will start to appear everywhere. And they have,
+   haven't they?
+
+   Unix and C are the ultimate computer viruses.
+
+   A further benefit of the worse-is-better philosophy is that the
+   programmer is conditioned to sacrifice some safety, convenience, and
+   hassle to get good performance and modest resource use. Programs
+   written using the New Jersey approach will work well both in small
+   machines and large ones, and the code will be portable because it is
+   written on top of a virus.
+
+   It is important to remember that the initial virus has to be basically
+   good. If so, the viral spread is assured as long as it is portable.
+   Once the virus has spread, there will be pressure to improve it,
+   possibly by increasing its functionality closer to 90%, but users have
+   already been conditioned to accept worse than the right thing.
+   Therefore, the worse-is-better software first will gain acceptance,
+   second will condition its users to expect less, and third will be
+   improved to a point that is almost the right thing. In concrete terms,
+   even though Lisp compilers in 1987 were about as good as C compilers,
+   there are many more compiler experts who want to make C compilers
+   better than want to make Lisp compilers better.
+
+   The good news is that in 1995 we will have a good operating system and
+   programming language; the bad news is that they will be Unix and C++.
+
+   There is a final benefit to worse-is-better. Because a New Jersey
+   language and system are not really powerful enough to build complex
+   monolithic software, large systems must be designed to reuse
+   components. Therefore, a tradition of integration springs up.
+
+   How does the right thing stack up? There are two basic scenarios: the
+   ``big complex system scenario'' and the ``diamond-like jewel''
+   scenario.
+
+   The ``big complex system'' scenario goes like this:
+
+   First, the right thing needs to be designed. Then its implementation
+   needs to be designed. Finally it is implemented. Because it is the
+   right thing, it has nearly 100% of desired functionality, and
+   implementation simplicity was never a concern so it takes a long time
+   to implement. It is large and complex. It requires complex tools to use
+   properly. The last 20% takes 80% of the effort, and so the right thing
+   takes a long time to get out, and it only runs satisfactorily on the
+   most sophisticated hardware.
+
+   The ``diamond-like jewel'' scenario goes like this:
+
+   The right thing takes forever to design, but it is quite small at every
+   point along the way. To implement it to run fast is either impossible
+   or beyond the capabilities of most implementors.
+
+   The two scenarios correspond to Common Lisp and Scheme.
+
+   The first scenario is also the scenario for classic artificial
+   intelligence software.
+
+   The right thing is frequently a monolithic piece of software, but for
+   no reason other than that the right thing is often designed
+   monolithically. That is, this characteristic is a happenstance.
+
+   The lesson to be learned from this is that it is often undesirable to
+   go for the right thing first. It is better to get half of the right
+   thing available so that it spreads like a virus. Once people are hooked
+   on it, take the time to improve it to 90% of the right thing.
+
+   A wrong lesson is to take the parable literally and to conclude that C
+   is the right vehicle for AI software. The 50% solution has to be
+   basically right, and in this case it isn't.
+
+   But, one can conclude only that the Lisp community needs to seriously
+   rethink its position on Lisp design. I will say more about this later.
+
+   rpg@lucid.com
+
+References
+
+   1. https://web.archive.org/web/20000816191611/http%3A//www.ai.mit.edu/docs/articles/good-news/section3.2.html
+   2. https://web.archive.org/web/20000816191611/http%3A//www.ai.mit.edu/docs/articles/good-news/section3.2.html
+   3. https://web.archive.org/web/20000816191611/http%3A//www.ai.mit.edu/docs/articles/good-news/subsection3.2.2.html
diff --git a/doc/www.rodsbooks.com_efi-programming_hello.txt b/doc/www.rodsbooks.com_efi-programming_hello.txt
new file mode 100644
index 0000000..22609c4
--- /dev/null
+++ b/doc/www.rodsbooks.com_efi-programming_hello.txt
@@ -0,0 +1,307 @@
+                              Programming for EFI:
+                       Creating a "Hello, World" Program
+
+   by Roderick W. Smith, [1]rodsmith@rodsbooks.com
+
+   Originally written: 5/3/2013
+
+   I'm a technical writer and consultant specializing in Linux
+   technologies. This Web page is provided free of charge and with no
+   annoying outside ads; however, I did take time to prepare it, and Web
+   hosting does cost money. If you find this Web page useful, please
+   consider making a small donation to help keep this site up and running.
+   Thanks!
+
+   Donate $1.00 Donate $2.50 Donate $5.00 Donate $10.00 Donate another
+   value
+   PayPal - The safer, easier way to pay online! Donate with PayPal
+   PayPal - The safer, easier way to pay online! Donate with PayPal
+   PayPal - The safer, easier way to pay online! Donate with PayPal
+   PayPal - The safer, easier way to pay online! Donate with PayPal
+   PayPal - The safer, easier way to pay online! Donate with PayPal
+     __________________________________________________________________
+
+   Note: This page is a sub-page of my Programming for EFI document. If a
+   Web search has brought you here, you may want to start at the
+   [2]introductory page.
+
+   The traditional first program for a new compiler or environment is
+   "Hello, World." I therefore present such a program for EFI, including
+   the program itself, a Makefile for the program, and instructions on how
+   to compile and run it.
+
+Creating the Program File
+
+   A "Hello, World" program for EFI demonstrates some of the unique
+   features of EFI programming. To begin, consider the program itself:
+#include <efi.h>
+#include <efilib.h>
+
+EFI_STATUS
+EFIAPI
+efi_main (EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) {
+   InitializeLib(ImageHandle, SystemTable);
+   Print(L"Hello, world!\n");
+
+   return EFI_SUCCESS;
+}
+
+   If you want to try compiling the program itself, cut-and-paste the
+   preceding lines into a file called main.c; the Makefile presented
+   shortly assumes this name. Many of this program's features are similar
+   to those of a similar program for Linux, Windows, or other OSes and
+   environments; however, there are some significant differences, too:
+     * The program begins with two #include directives, which load EFI
+       header files from the GNU-EFI package. You should not normally
+       include regular C header files, such as stdlib.h, because most of
+       these header files define data types and functions that are used by
+       the C library. This library is not available in EFI, though, and
+       because of differences in compilation options, you can't easily
+       compile it into an EFI program. Instead, you must rely exclusively
+       on your development package's library. Also note that if you use
+       TianoCore's EDK II rather than GNU-EFI, you'll need to include
+       different (and usually more) header files than shown here.
+     * Instead of using an entry point function called main(), EFI
+       programs written with GNU-EFI use an entry point called efi_main().
+       This function takes two arguments, as shown in the sample program,
+       which point to the program's image and to the EFI system table,
+       respectively. (The EFI system table is the key to accessing most
+       EFI features, as described on the [3]Using EFI Services page.) If
+       you use the TianoCore EDK II toolkit, you can give the program's
+       entry point another name, but you must specify the entry point when
+       linking the program.
+     * The efi_main() function returns a value of type EFI_STATUS.
+       Possible return values are summarized in the [4]Phoenix EFI
+       documentation, among other places. The sample program always
+       returns EFI_SUCCESS, but a more complex program might return
+       another value.
+     * The definition of EFIAPI varies between platforms. It tells the
+       compiler to use EFI's calling conventions for the specified
+       function. Unfortunately for Linux programmers, EFI uses Microsoft's
+       application binary interface (ABI), rather than the System V (SysV)
+       ABI used by Linux, and therefore used by GCC by default. Functions
+       called from outside your own program, such as efi_main() and
+       functions you pass to the EFI (as drivers are likely to do, and as
+       you might do if you want to patch the EFI's service tables) must be
+       so identified. If you use the TianoCore toolkit, it uses the
+       Microsoft ABI internally, but GNU-EFI uses the SysV ABI internally.
+     * The call to InitializeLib() sets up some critical global variables
+       that are created by GNU-EFI and used to access the firmware's
+       features. Most importantly, the ST variable identifies the system
+       table, the BS variable refers to boot services, and RT refers to
+       runtime services. If you use the TianoCore EDK II, this call
+       doesn't exist, and the variables in question are generally referred
+       to as gST, gBS, and gRT, respectively.
+     * The EFI Print() call takes the place of the printf() function with
+       which you're probably familiar. You can use the two in a very
+       similar fashion, but this example program illustrates one
+       exception: Strings in EFI use a 16-bit encoding, so you must
+       precede string constants with the character L to denote a 16-bit
+       representation. When using string variables, they must normally be
+       defined as a CHAR16* type in order to display properly with Print()
+       or to be passed to most EFI functions. More advanced programs can
+       use common conversion specifiers with Print(), such as %s for
+       strings and %d for decimal numbers.
+
+   More complex programs will of course expose additional differences
+   between a C program written for most OSes and one written for EFI. Most
+   of these differences relate to library differences between EFI and
+   other environments.
+
+Creating the Makefile
+
+   If you were building a "Hello, World" program for Linux in a Linux
+   environment, you could compile it without a Makefile. Building the
+   program in Linux for EFI, though, is essentially a cross-compilation
+   operation. As such, it necessitates using unusual compilation and
+   linker options, as well as a post-linking operation to convert the
+   program into a form that the EFI will accept. Although you could type
+   all the relevant commands by hand, a Makefile helps a lot. Such a file
+   to build the preceding program file looks like this:
+ARCH            = $(shell uname -m | sed s,i[3456789]86,ia32,)
+
+OBJS            = main.o
+TARGET          = hello.efi
+
+EFIINC          = /usr/include/efi
+EFIINCS         = -I$(EFIINC) -I$(EFIINC)/$(ARCH) -I$(EFIINC)/protocol
+LIB             = /usr/lib64
+EFILIB          = /usr/lib64/gnuefi
+EFI_CRT_OBJS    = $(EFILIB)/crt0-efi-$(ARCH).o
+EFI_LDS         = $(EFILIB)/elf_$(ARCH)_efi.lds
+
+CFLAGS          = $(EFIINCS) -fno-stack-protector -fpic \
+                  -fshort-wchar -mno-red-zone -Wall
+ifeq ($(ARCH),x86_64)
+  CFLAGS += -DEFI_FUNCTION_WRAPPER
+endif
+
+LDFLAGS         = -nostdlib -znocombreloc -T $(EFI_LDS) -shared \
+                  -Bsymbolic -L $(EFILIB) -L $(LIB) $(EFI_CRT_OBJS)
+
+all: $(TARGET)
+
+hello.so: $(OBJS)
+        ld $(LDFLAGS) $(OBJS) -o $@ -lefi -lgnuefi
+
+%.efi: %.so
+        objcopy -j .text -j .sdata -j .data -j .dynamic \
+                -j .dynsym  -j .rel -j .rela -j .reloc \
+                --target=efi-app-$(ARCH) $^ $@
+
+   If you cut-and-paste this code into a text editor, be sure to convert
+   stretches of eight characters to tabs! Also, be aware that you may need
+   to adjust the EFIINC, LIB, and EFILIB variables to point to the
+   relevant portions of your GNU-EFI installation directory. EFIINC
+   should, of course, point to your GNU-EFI include files; LIB should
+   point to the directory that holds the libefi.a and libgnuefi.a files;
+   and EFILIB should point to the directory that holds the
+   crt0-efi-x86_64.o and elf_x86_64_efi.lds files (or equivalents for
+   another architecture). The Makefile shown here works on a Fedora 18
+   installation. On Ubuntu 13.04, both LIB and EFILIB must be set to
+   /usr/lib; and on Gentoo, they must both be set to /usr/lib64.
+
+   The CFLAGS line sets a number of options that are important for getting
+   a working EFI binary. Although some of these could be omitted or
+   changed for the simple "Hello, World" demonstration, they can be
+   important for larger programs:
+     * -fno-stack-protector--Stack protection isn't supported by EFI, so
+       there's no point in building a binary with this feature active.
+     * -fpic--EFI requires that code be position-independent, hence the
+       use of this option.
+     * -fshort-wchar--GCC defines the wchar_t type to be 32 bits by
+       default, but EFI requires it to be 16 bits for 16-bit strings to
+       work correctly.
+     * -fmno-red-zone--On x86-64 systems, the red zone is an area that
+       follows the stack pointer that can be used for temporary variables.
+       The EFI may modify this area, though, so it's not safe to use, and
+       you must compile EFI binaries with this option.
+     * -Wall--When developing EFI applications, you might want to pay
+       extra attention to compiler warnings, and this switch (which causes
+       warnings to be treated as errors) can help.
+     * -DEFI_FUNCTION_WRAPPER--This option is required on the x86-64
+       platform, but is not defined on the 32-bit x86 platform. It relates
+       to the calling conventions for EFI functions, described on the
+       [5]Using EFI Services page.
+
+   Linker flags are defined in LDFLAGS, of course. They have the following
+   effects:
+     * -nostdlib--An EFI application should not be linked against standard
+       libraries, and this argument accomplishes this goal.
+     * -nocombreloc--This argument causes the linker to not combine
+       relocation sections.
+     * -T $(EFI_LDS)--To create an EFI binary, a non-standard linker
+       script must be used, and this option tells ld where to find it.
+     * -shared--Even with GNU-EFI's new linker script, ld can't create the
+       final executable. Instead, it creates a shared library, which is
+       subsequently turned into the final binary.
+     * -Bsymbolic--This option causes references to global symbols to be
+       bound to the definitions within the shared library.
+
+   When ld finishes its work, the result is a file called hello.so, which
+   is technically a shared library. To create an EFI executable, the
+   Makefile calls the objcopy program, which copies the code needed from
+   the library to create an EFI application.
+
+   If you use TianoCore EDK II, many of these options will be different.
+   One extremely important difference is the inclusion of the
+   -DEFIAPI=__attribute__((ms_abi)) GCC flag, which causes the binary to
+   be built using Microsoft's ABI. The linking process is also different;
+   the TianoCore EDK II includes its own program, called GenFw, that
+   builds the final binary instead of objcopy. TianoCore is designed
+   around a build process that doesn't use make, so you must either use
+   TianoCore's own build process or design a Makefile to mimic it. I don't
+   describe either approach here, so you should consult TianoCore's
+   documentation for the first option. If you want to use TianoCore with
+   make, check [6]rEFInd, which uses this approach, as a model.
+
+Compiling and Running the Program
+
+   Compiling the program is simple: Type make. The result should be
+   generation of intermediate files and the final hello.efi program file
+   of about 46KiB on a 64-bit system. If you encounter errors, you'll have
+   to fix them yourself. Be sure that your Makefile uses tabs where
+   necessary, and check the locations of the header and library files, as
+   already described. Note that the build process described here results
+   in a program file for the architecture you're using. If you build on a
+   64-bit system, the binary won't work on a 32-bit computer, and
+   vice-versa.
+
+   The best way to run the program is likely to be from an EFI shell. You
+   can download a binary from the TianoCore site, for both [7]64-bit
+   (x86-64) and [8]32-bit (x86) platforms. Rename these files as
+   shellx64.efi or shellia32.efi, respectively, and place them in the root
+   directory of your EFI System Partition (ESP). Some ESPs and boot
+   managers, such as [9]rEFInd and [10]gummiboot, recognize this name and
+   location as special, and will enable you to launch a shell when one
+   exists there. Other boot managers, such as GRUB, may require explicit
+   configuration to launch an EFI shell. Treat the shell like an OS's boot
+   loader. For instance, in GRUB 2 you might create an entry in
+   /etc/grub.d/40_custom like the following:
+menuentry "EFI shell" {
+        insmod part_gpt
+        insmod chain
+        set root='(hd0,gpt1)'
+        chainloader /shellx64.efi
+}
+
+   Note: If your computer boots with [11]Secure Boot active, you should
+   disable Secure Boot for your initial tests, since launching your own
+   applications on such a computer complicates matters.
+
+   Details will vary depending on your installation, though. Once you've
+   made these changes, use update-grub or grub-mkconfig to re-create your
+   grub.cfg file with the new entry to launch the EFI shell.
+
+   When you launch your EFI shell, you should type fs0: to change to the
+   first filesystem, which is normally the ESP. You can then type
+   hello.efi to launch the program. You should see its output, as in:
+Shell> fs0:
+
+fs0:\> hello.efi
+Hello, world!
+
+fs0:\> exit
+
+   If the program hangs or otherwise misbehaves, you may need to review
+   the code and build process. Unfortunately, debugging EFI applications
+   can be tedious, because the usual debugging tools don't work with them.
+   I find that using a virtual machine can help. VirtualBox, for instance,
+   supports EFI, so it's possible to install Linux under EFI on VirtualBox
+   and use it for testing EFI applications compiled in the host
+   environment. This procedure at least obviates the need to re-start your
+   editor or IDE after every test of your program. Ubuntu and Linux Mint
+   work well in this role because they both boot very quickly, which can
+   speed things up if you need to make a small but quick change to be
+   tested immediately.
+     __________________________________________________________________
+
+   [12]Go on to "Using EFI Services"
+
+   [13]Return to the "Programming for EFI" main page
+     __________________________________________________________________
+
+   copyright � 2013 by Roderick W. Smith
+
+   If you have problems with or comments about this Web page, please
+   e-mail me at [14]rodsmith@rodsbooks.com. Thanks.
+
+   [15]Return to my main Web page.
+
+References
+
+   1. mailto:rodsmith@rodsbooks.com
+   2. http://www.rodsbooks.com/efi-programming/index.html
+   3. http://www.rodsbooks.com/efi-programming/efi_services.html
+   4. http://wiki.phoenix.com/wiki/index.php/EFI_STATUS
+   5. http://www.rodsbooks.com/efi-programming/efi_services.html
+   6. http://www.rodsbooks.com/refind/
+   7. https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2/EdkShellBinPkg/FullShell/X64/Shell_Full.efi
+   8. https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2/EdkShellBinPkg/FullShell/Ia32/Shell_Full.efi
+   9. http://www.rodsbooks.com/refind/
+  10. http://freedesktop.org/wiki/Software/gummiboot
+  11. http://www.rodsbooks.com/efi-bootloaders/secureboot.html
+  12. http://www.rodsbooks.com/efi-programming/efi_services.html
+  13. http://www.rodsbooks.com/efi-programming/index.html
+  14. mailto:rodsmith@rodsbooks.com
+  15. http://www.rodsbooks.com/
diff --git a/src/Makefile b/src/Makefile
index eac4004..15018f6 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,7 +2,7 @@ CC := gcc
 DEFINES = -DOS_ABAOS
 OPT := -O0
 INCLUDES = -I. -Ilibc -Ihardware -Idrivers -Idrivers/hdi -Idrivers/hdi/ps2 -Idrivers/video -Inet -Idrivers/net -Ikernel -Igui
-CFLAGS := -std=c99 -m32 -march=i486 -ffreestanding -nostdlib $(OPT) -g -Werror $(INCLUDES) $(DEFINES)
+CFLAGS := -std=c99 -m32 -march=i386 -ffreestanding -nostdlib $(OPT) -g -Werror $(INCLUDES) $(DEFINES)
 LD := ld
 LDFLAGS := -m elf_i386
 NASMFLAGS := -f elf32
diff --git a/src/boot/stage2_a20.asm b/src/boot/stage2_a20.asm
index c0951ad..d9e2b36 100644
--- a/src/boot/stage2_a20.asm
+++ b/src/boot/stage2_a20.asm
@@ -62,6 +62,20 @@ check_and_enable_A20:
 	cmp ax, 1
 	je A20_ENABLED
 
+A20_FAST_SPECIAL_PORT:
+
+	mov al, 'F'
+	call print_char
+
+	in al, 0x92
+	or al, 2
+	out 0x92, al
+
+	call check_A20_enabled
+	cmp ax, 1
+	je A20_ENABLED
+	ret
+
 A20_ENABLE_KBD_PORT:
 	mov al, 'K'
 	call print_char
@@ -129,20 +143,6 @@ A20_ENABLE_KBD_OUT:
 	cmp ax, 1
 	je A20_ENABLED
 
-A20_FAST_SPECIAL_PORT:
-
-	mov al, 'F'
-	call print_char
-
-	in al, 0x92
-	or al, 2
-	out 0x92, al
-
-	call check_A20_enabled
-	cmp ax, 1
-	je A20_ENABLED
-	ret
-
 A20_ENABLE_VIA_BIOS:
 	mov al, 'B'
 	call print_char