summaryrefslogtreecommitdiff
path: root/miniany/doc/C Compiler, Part 9_ Functions.html
blob: 87a21791d9e8ae27d2e69ac7934acd097787dbdf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
<!DOCTYPE html>
<!-- saved from url=(0058)https://norasandler.com/2018/06/27/Write-a-Compiler-9.html -->
<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1">

  <title>C Compiler, Part 9: Functions</title>
  <meta name="description" content="This is the ninth post in a series. Read part 1 here.">

  <link rel="stylesheet" href="./C Compiler, Part 9_ Functions_files/main.css">
  <link rel="canonical" href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html">
  <link rel="alternate" type="application/rss+xml" title="Nora Sandler" href="https://norasandler.com/feed.xml">
  
</head>


  <body>

    <header class="site-header" role="banner">

  <div class="wrapper">
    
    
    <a class="site-title" href="https://norasandler.com/">Nora Sandler</a>
  
    
      <nav class="site-nav">
        <input type="checkbox" id="nav-trigger" class="nav-trigger">
        <label for="nav-trigger">
          <span class="menu-icon">
            <svg viewBox="0 0 18 15" width="18px" height="15px">
              <path fill="#424242" d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.031C17.335,0,18,0.665,18,1.484L18,1.484z"></path>
              <path fill="#424242" d="M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0c0-0.82,0.665-1.484,1.484-1.484 h15.031C17.335,6.031,18,6.696,18,7.516L18,7.516z"></path>
              <path fill="#424242" d="M18,13.516C18,14.335,17.335,15,16.516,15H1.484C0.665,15,0,14.335,0,13.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.031C17.335,12.031,18,12.696,18,13.516L18,13.516z"></path>
            </svg>
          </span>
        </label>

        <div class="trigger">
          
            
            
              
            
          
            
            
              
              <a class="page-link" href="https://norasandler.com/about/">About</a>
              
            
          
            
            
              
              <a class="page-link" href="https://norasandler.com/archive/">Archive</a>
              
            
          
            
            
          
            
            
              
            
          
            
            
              
            
          
            
            
              
            
          
          <a class="page-link" href="https://github.com/nlsandler">Github</a>
          <a href="https://norasandler.com/feed.xml"><img id="rss" height="20" width="20" src="./C Compiler, Part 9_ Functions_files/rss.png"></a>

        </div>
      </nav>
    
  </div>
</header>


    <main class="page-content" aria-label="Content">
      <div class="wrapper">
        <article class="post h-entry" itemscope="" itemtype="http://schema.org/BlogPosting">

  <header class="post-header">
    <h1 class="post-title p-name" itemprop="name headline">C Compiler, Part 9: Functions</h1>
    <p class="post-meta">
      <time class="dt-published" datetime="2018-06-27T20:00:00+00:00" itemprop="datePublished">Jun 27, 2018
      </time></p>
  </header>

  <div class="post-content e-content" itemprop="articleBody">
    <p><em>This is the ninth post in a series. Read part 1 <a href="https://norasandler.com/2017/11/29/Write-a-Compiler.html">here</a>.</em></p>

<p>In this post we’re adding function calls! This is a particularly exciting post because we get to talk about calling conventions and stack frames and some weird corners of the C11 standard. Plus, by the end of this post we’ll be able to compile “Hello, World!” 🎉</p>

<p>As usual, accompanying tests are <a href="https://github.com/nlsandler/write_a_c_compiler">here</a>.</p>

<h1 id="part-9-functions">Part 9: Functions</h1>

<p>Of course, our compiler can already handle function definitions, because we can already define <code class="language-plaintext highlighter-rouge">main</code>.
But in this post, we’ll add support for function calls:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">three</span><span class="p">()</span> <span class="p">{</span>
    <span class="k">return</span> <span class="mi">3</span><span class="p">;</span>
<span class="p">}</span>

<span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="k">return</span> <span class="n">three</span><span class="p">();</span>
<span class="p">}</span>
</code></pre></div></div>

<p>We’ll also add support for function parameters:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">sum</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">,</span> <span class="kt">int</span> <span class="n">b</span><span class="p">)</span> <span class="p">{</span>
    <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="n">b</span><span class="p">;</span>
<span class="p">}</span>

<span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="k">return</span> <span class="n">sum</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">);</span>
<span class="p">}</span>
</code></pre></div></div>
<p>And for forward declarations:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">sum</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">,</span> <span class="kt">int</span> <span class="n">b</span><span class="p">);</span>

<span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="k">return</span> <span class="n">sum</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">);</span>
<span class="p">}</span>

<span class="kt">int</span> <span class="nf">sum</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">,</span> <span class="kt">int</span> <span class="n">b</span><span class="p">)</span> <span class="p">{</span>
    <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="n">b</span><span class="p">;</span>
<span class="p">}</span>
</code></pre></div></div>

<h3 id="terminology">Terminology</h3>

<ul>
  <li>
    <p>A function <strong>declaration</strong> specifies a function’s name, return type, and optionally its parameter list:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">();</span>
</code></pre></div>    </div>
  </li>
  <li>
    <p>A function <strong>prototype</strong> is a special type of function declaration that includes parameter type information:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">);</span>
</code></pre></div>    </div>
    <p>Function prototypes are the only function declarations we’ll support, even in places where the C11 standard allows non-prototype declarations.</p>
  </li>
  <li>
    <p>A function <strong>definition</strong> is a declaration plus a function body:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">)</span> <span class="p">{</span>
      <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">;</span>
  <span class="p">}</span>
</code></pre></div>    </div>

    <p>Note that you can declare a function as many times as you like, but you can only define it once<sup id="anchor1"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn1">1</a></sup>. Also note that whenever we say “all function declarations,” that includes function declarations that are part of function definitions.</p>
  </li>
  <li>
    <p>A <strong>forward declaration</strong> is a function declaration without a function body. It tells the compiler you’re going to define the function later, possibly in a different file, and lets you use a function before it’s defined.</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">);</span>
</code></pre></div>    </div>

    <p>You can also declare a function that has already been defined. This is legal but technically not a forward declaration…I guess it’s a backwards declaration? It would also be pretty pointless:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">()</span> <span class="p">{</span>
      <span class="k">return</span> <span class="mi">4</span><span class="p">;</span>
  <span class="p">}</span>

  <span class="kt">int</span> <span class="nf">foo</span><span class="p">();</span>
</code></pre></div>    </div>
  </li>
  <li>
    <p>A function’s <strong>arguments</strong> are the values passed to a function call. A function’s <strong>parameters</strong> are the variables defined in the function declaration. In this code snippet, <code class="language-plaintext highlighter-rouge">a</code> is a parameter and <code class="language-plaintext highlighter-rouge">3</code> is an argument:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">)</span> <span class="p">{</span>
      <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">;</span>
  <span class="p">}</span>

  <span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
      <span class="k">return</span> <span class="n">foo</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
  <span class="p">}</span>
</code></pre></div>    </div>
  </li>
</ul>

<h3 id="limitations">Limitations</h3>

<ul>
  <li>
    <p>For now, we’ll only support functions with return type <code class="language-plaintext highlighter-rouge">int</code> and parameters with type <code class="language-plaintext highlighter-rouge">int</code>.</p>
  </li>
  <li>
    <p>We won’t support function declarations with missing parameters or type information; in other words, we’ll require all function declarations to be function prototypes, whether or not they’re part of function definitions.</p>
  </li>
  <li>
    <p>We’ll interpret an empty parameter list (e.g. in the declaration <code class="language-plaintext highlighter-rouge">int foo()</code>) to mean that the function has no parameters. This deviates from the C11 standard; according to the standard, <code class="language-plaintext highlighter-rouge">int foo(void)</code> is a function prototype indicating <code class="language-plaintext highlighter-rouge">foo</code> has no parameters, and <code class="language-plaintext highlighter-rouge">int foo()</code> is a declaration where the parameters aren’t specified (i.e. not a function prototype).</p>
  </li>
  <li>
    <p>We won’t support function definitions using identifier-list form, which looks like this:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="n">foo</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
  <span class="kt">int</span> <span class="n">a</span><span class="p">;</span>
  <span class="p">{</span>
      <span class="k">return</span> <span class="n">a</span> <span class="o">*</span> <span class="mi">2</span><span class="p">;</span>
  <span class="p">}</span>
</code></pre></div>    </div>
  </li>
  <li>
    <p>We’ll require parameter names in function declarations. For example, we won’t support this:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span><span class="p">,</span> <span class="kt">int</span><span class="p">);</span>
</code></pre></div>    </div>
  </li>
  <li>
    <p>We won’t support storage class specifiers (e.g. <code class="language-plaintext highlighter-rouge">extern</code>, <code class="language-plaintext highlighter-rouge">static</code>), type qualifiers (e.g. <code class="language-plaintext highlighter-rouge">const</code>, <code class="language-plaintext highlighter-rouge">atomic</code>), function specifiers (<code class="language-plaintext highlighter-rouge">inline</code>, <code class="language-plaintext highlighter-rouge">_Noreturn</code>) or alignment specifiers (<code class="language-plaintext highlighter-rouge">_Alignas</code>)</p>
  </li>
</ul>

<h2 id="lexing">Lexing</h2>

<p>Nothing fancy here; we just need to add commas to separate the function arguments. Here’s the full list of tokens so far:</p>

<ul>
  <li><code class="language-plaintext highlighter-rouge">{</code></li>
  <li><code class="language-plaintext highlighter-rouge">}</code></li>
  <li><code class="language-plaintext highlighter-rouge">(</code></li>
  <li><code class="language-plaintext highlighter-rouge">)</code></li>
  <li><code class="language-plaintext highlighter-rouge">;</code></li>
  <li><code class="language-plaintext highlighter-rouge">int</code></li>
  <li><code class="language-plaintext highlighter-rouge">return</code></li>
  <li>Identifier <code class="language-plaintext highlighter-rouge">[a-zA-Z]\w*</code></li>
  <li>Integer literal <code class="language-plaintext highlighter-rouge">[0-9]+</code></li>
  <li><code class="language-plaintext highlighter-rouge">-</code></li>
  <li><code class="language-plaintext highlighter-rouge">~</code></li>
  <li><code class="language-plaintext highlighter-rouge">!</code></li>
  <li><code class="language-plaintext highlighter-rouge">+</code></li>
  <li><code class="language-plaintext highlighter-rouge">*</code></li>
  <li><code class="language-plaintext highlighter-rouge">/</code></li>
  <li><code class="language-plaintext highlighter-rouge">&amp;&amp;</code></li>
  <li><code class="language-plaintext highlighter-rouge">||</code></li>
  <li><code class="language-plaintext highlighter-rouge">==</code></li>
  <li><code class="language-plaintext highlighter-rouge">!=</code></li>
  <li><code class="language-plaintext highlighter-rouge">&lt;</code></li>
  <li><code class="language-plaintext highlighter-rouge">&lt;=</code></li>
  <li><code class="language-plaintext highlighter-rouge">&gt;</code></li>
  <li><code class="language-plaintext highlighter-rouge">&gt;=</code></li>
  <li><code class="language-plaintext highlighter-rouge">=</code></li>
  <li><code class="language-plaintext highlighter-rouge">if</code></li>
  <li><code class="language-plaintext highlighter-rouge">else</code></li>
  <li><code class="language-plaintext highlighter-rouge">:</code></li>
  <li><code class="language-plaintext highlighter-rouge">?</code></li>
  <li><code class="language-plaintext highlighter-rouge">for</code></li>
  <li><code class="language-plaintext highlighter-rouge">while</code></li>
  <li><code class="language-plaintext highlighter-rouge">do</code></li>
  <li><code class="language-plaintext highlighter-rouge">break</code></li>
  <li><code class="language-plaintext highlighter-rouge">continue</code></li>
  <li><strong><code class="language-plaintext highlighter-rouge">,</code></strong></li>
</ul>

<h4 id="-task">☑ Task:</h4>
<p>Add support for commas to the lexer.</p>

<h2 id="parsing">Parsing</h2>

<p>We’ll deal with function definitions first, then function calls.</p>

<h3 id="function-definitions">Function Definitions</h3>

<p>In our old definition, a function just had a name and a body:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>function_declaration = Function(string, block_item list) //string is the function name
</code></pre></div></div>

<p>Now we need to add a list of parameters. We also need to support declarations that don’t include a function body. I defined a single <code class="language-plaintext highlighter-rouge">function_declaration</code> AST rule, with an optional function body, to represent both declarations and definitions:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>function_declaration = Function(string, // function name
                                string list, // parameters
                                block_item list option) // body
</code></pre></div></div>

<p>But you could also have different rules for function declarations and definitions if you wanted.</p>

<p>Note that we don’t include the function’s return type or parameter types, because right now <code class="language-plaintext highlighter-rouge">int</code> is the only type. We’ll need to expand this definition when we add other types.</p>

<p>We also need to update the grammar. Here was the old <code class="language-plaintext highlighter-rouge">&lt;function&gt;</code> grammar rule:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;function&gt; ::= "int" &lt;id&gt; "(" ")" "{" { &lt;block-item&gt; } "}"
</code></pre></div></div>

<p>And here’s the new one. Note that the function declaration ends with either a function body (if it’s a definition) or a semicolon (if it’s not).</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;function&gt; ::= "int" &lt;id&gt; "(" [ "int" &lt;id&gt; { "," "int" &lt;id&gt; } ] ")" ( "{" { &lt;block-item&gt; } "}" | ";" )
</code></pre></div></div>

<h3 id="function-calls">Function Calls</h3>

<p>A function call is an expression that looks like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">foo</span><span class="p">(</span><span class="n">arg1</span><span class="p">,</span> <span class="n">arg2</span><span class="p">)</span>
</code></pre></div></div>

<p>It has an ID (the function name) and a list of arguments. Its arguments can be arbitrary expressions:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">foo</span><span class="p">(</span><span class="n">arg1</span> <span class="o">+</span> <span class="mi">2</span><span class="p">,</span> <span class="n">bar</span><span class="p">())</span>
</code></pre></div></div>

<p>So we can update the AST definition for expressions like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>exp = ...
    | FunCall(string, exp list) // string is the function name
    ...
</code></pre></div></div>

<p>We also need to update the grammar. Function calls have the highest possible precedence level, right up there with postfix unary operators.
So we’ll add them to the <code class="language-plaintext highlighter-rouge">&lt;factor&gt;</code> rule in the grammar:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;factor&gt; ::= &lt;function-call&gt; | "(" &lt;exp&gt; ")" | &lt;unary_op&gt; &lt;factor&gt; | &lt;int&gt; | &lt;id&gt;
&lt;function-call&gt; ::= id "(" [ &lt;exp&gt; { "," &lt;exp&gt; } ] ")"
</code></pre></div></div>

<h3 id="top-level">Top Level</h3>

<p>In our old definition, a program consisted of a single function definition. Now it needs to permit multiple function declarations:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>program = Program(function_declaration list)
</code></pre></div></div>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;program&gt; ::= { &lt;function&gt; }
</code></pre></div></div>

<h4 id="-task-1">☑ Task:</h4>
<p>Update parsing to succeed on all valid stage 1-9 examples. You may or may not want to handle invalid examples here: see the next section on validation.</p>

<h2 id="validation">Validation</h2>

<p>We need to validate that the function declarations and calls in our program are legal. You can either handle these checks during code generation, or add a new validation pass between parsing and code generation. <strong>Edited to add:</strong> I previously recommended performing validation during the parsing stage. This turns out to be a bad idea, because this will become increasingly cumbersome as we need to validate more things in future posts.</p>

<p>Your compiler must fail if:</p>

<ul>
  <li>
    <p>The program includes two definitions of the same function name.</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(){</span>
      <span class="k">return</span> <span class="mi">3</span><span class="p">;</span>
  <span class="p">}</span>

  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">){</span>
      <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">;</span>
  <span class="p">}</span>
</code></pre></div>    </div>
  </li>
  <li>
    <p>Two declarations of a function have different numbers of parameters. Different parameter names are okay, though.</p>

    <p>This is illegal<sup id="anchor2"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn2">2</a></sup>:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">,</span> <span class="kt">int</span> <span class="n">b</span><span class="p">);</span>

  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">){</span>
      <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">;</span>
  <span class="p">}</span>
</code></pre></div>    </div>

    <p>But this is okay:</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">);</span>

  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">b</span><span class="p">){</span>
      <span class="k">return</span> <span class="n">b</span> <span class="o">+</span> <span class="mi">1</span><span class="p">;</span>
  <span class="p">}</span>
</code></pre></div>    </div>
  </li>
  <li>
    <p>A function is called with the wrong number of arguments, e.g.</p>

    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">){</span>
      <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">;</span>
  <span class="p">}</span>

  <span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
      <span class="k">return</span> <span class="n">foo</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">);</span>
  <span class="p">}</span>
</code></pre></div>    </div>
  </li>
  <li>
    <p>Optionally, you may want to fail if a function is called before it’s declared. Note that it’s totally legal to call a function that has been declared but not defined. It’s also legal to declare a function and <em>never</em> define it; however, linking will fail if the function isn’t declared in some other library the linker can find<sup id="anchor3"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn3">3</a></sup>.</p>

    <p>So this is illegal:</p>
    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
      <span class="k">return</span> <span class="n">putchar</span><span class="p">(</span><span class="mi">65</span><span class="p">);</span>
  <span class="p">}</span>

  <span class="kt">int</span> <span class="nf">foo</span><span class="p">(){</span>
      <span class="k">return</span> <span class="mi">3</span><span class="p">;</span>
  <span class="p">}</span>
</code></pre></div>    </div>

    <p>But this is legal:</p>
    <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code>  <span class="kt">int</span> <span class="nf">putchar</span><span class="p">(</span><span class="kt">int</span> <span class="n">c</span><span class="p">);</span>

  <span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
      <span class="n">putchar</span><span class="p">(</span><span class="mi">65</span><span class="p">);</span>
  <span class="p">}</span>
</code></pre></div>    </div>

    <p>This last point is optional because neither GCC nor clang enforces it — they both warn but don’t fail on the illegal example above. Calling a function before it’s declared is called “implicit function declaration” and it was legal before C99, so I guess enforcing this rule would have broken a lot of older code. The test suite doesn’t include any implicit function declarations, so you can handle it however you like and you can still pass all the tests.</p>
  </li>
</ul>

<h4 id="-task-2">☑ Task:</h4>
<p>Update your compiler to fail on invalid stage 1-9 examples. You can handle this during code generation, or a new stage between parsing and code generation. Bonus points for useful error messages.</p>

<p>To handle this, you’ll probably want to traverse the tree and maintain a map to track the number of arguments to each function, and whether that function has been defined yet.</p>

<h2 id="code-generation">Code Generation</h2>

<p>Once again, we’ll handle function definitions first, then function calls. But before we do any of that, let’s discuss…</p>

<h3 id="calling-conventions">Calling Conventions</h3>

<p>In most of the examples above, we defined a function and then called it in the same file. But we also want to call functions from shared libraries; we particularly want to call the standard library, so we can access I/O functions, so we can write “Hello, World”. When you use a shared library, you generally don’t recompile it yourself; you link to a precompiled binary. We definitely don’t want to recompile the whole standard library! That means we need to generate machine code that can interact with object files built by other compilers. In earlier posts, I’ve often said “this isn’t how a real compiler would do this thing, but it works.” In this post, we <em>have</em> to do things the same way as everyone else or we can’t use prebuilt libraries.</p>

<p>In other words, we need to follow the appropriate <em>calling convention</em>. A calling convention answers questions like:</p>

<ul>
  <li>How are arguments passed to the callee? Are they passed in registers or on the stack?</li>
  <li>Is the caller or callee responsible for removing arguments from the stack after the callee has executed?</li>
  <li>How are return values passed back to the caller?</li>
  <li>Which registers are caller-saved and which are callee-saved<sup id="anchor4"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn4">4</a></sup>?</li>
</ul>

<p>C programs on 32-bit OS X, Linux, and other Unix-like systems use the <code class="language-plaintext highlighter-rouge">cdecl</code> calling convention<sup id="anchor5"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn5">5</a></sup>, which means:</p>

<ul>
  <li>Arguments are passed on the stack. They’re pushed on the stack from right to left (so the first function argument is at the lowest address).</li>
  <li>The caller cleans the arguments from the stack.</li>
  <li>Return values are passed in the <code class="language-plaintext highlighter-rouge">EAX</code> register. (The full answer is more complicated, but this is good enough as long as we can only return integers.)</li>
  <li>The <code class="language-plaintext highlighter-rouge">EAX</code>, <code class="language-plaintext highlighter-rouge">ECX</code>, and <code class="language-plaintext highlighter-rouge">EDX</code> registers are caller-saved, and all others are callee-saved. We’ll see in the next section that the callee has to restore <code class="language-plaintext highlighter-rouge">EBP</code> and <code class="language-plaintext highlighter-rouge">ESP</code> before it returns, and restores <code class="language-plaintext highlighter-rouge">EIP</code> with the <code class="language-plaintext highlighter-rouge">ret</code> instruction. Normally it would also need to restore <code class="language-plaintext highlighter-rouge">ESI</code>, <code class="language-plaintext highlighter-rouge">EDI</code>, and <code class="language-plaintext highlighter-rouge">EBX</code>, but we don’t actually use these registers. And we already push values from <code class="language-plaintext highlighter-rouge">EAX</code>, <code class="language-plaintext highlighter-rouge">ECX</code>, and <code class="language-plaintext highlighter-rouge">EDX</code> onto the stack right away if we’re going to need them later. So basically, we don’t have to worry about saving and restoring registers at all.</li>
</ul>

<p>There are two import differences between OS X and Linux:</p>

<ul>
  <li>Stack alignment. On OS X, the stack needs to be 16-byte aligned at the beginning of a function call (i.e. when the <code class="language-plaintext highlighter-rouge">call</code> instruction is issued)<sup id="anchor6"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn6">6</a></sup>. This isn’t required on Linux, but GCC still keeps the stack 16-byte aligned<sup id="anchor7"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn7">7</a></sup>.</li>
  <li>Name decoration. On OS X, function names in assembly are prepended with an underscore (e.g. <code class="language-plaintext highlighter-rouge">main</code> becomes <code class="language-plaintext highlighter-rouge">_main</code>). On systems that use the ELF file format (Linux and most other *nix systems), there’s no underscore.  This isn’t part of the calling convention per se but it is important.</li>
</ul>

<p>We’ll need to be really comfortable with all this to implement it ourselves, so let’s look at…</p>

<h3 id="cdecl-function-calls-in-excruciating-detail">cdecl Function Calls in Excruciating Detail</h3>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">foo</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">);</span>
</code></pre></div></div>

<p>What, exactly, happens when your computer executes this line of code? We touched on this in <a href="https://norasandler.com/2018/01/08/Write-a-Compiler-5.html">part 5</a>, but now we’ll dig into it a lot more. We won’t worry about keeping the stack 16-byte aligned for now.</p>

<p>We’ll say that <code class="language-plaintext highlighter-rouge">foo</code> is being called from another function, <code class="language-plaintext highlighter-rouge">bar</code>. The line of C above will get turned into this assembly:</p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nf">push</span> <span class="kc">$</span><span class="mi">3</span>
<span class="nf">push</span> <span class="kc">$</span><span class="mi">2</span>
<span class="nf">push</span> <span class="kc">$</span><span class="mi">1</span>
<span class="nf">call</span> <span class="nv">_foo</span>
<span class="nf">add</span> <span class="kc">$</span><span class="mh">0xc</span><span class="p">,</span> <span class="o">%</span><span class="nb">esp</span>
</code></pre></div></div>

<p>First, let’s look at the state of the world before we start calling <code class="language-plaintext highlighter-rouge">foo</code><sup id="anchor8"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn8">8</a></sup>:</p>

<p><img src="./C Compiler, Part 9_ Functions_files/before_function_call.svg" alt="EBP points at the base of bar&#39;s stack frame at 0x14. ESP is 4 bytes below it at 0x10. EIP points at &quot;pushl $3&quot;."></p>

<p>One chunk of memory contains the stack frame, which we’re already familiar with. The <code class="language-plaintext highlighter-rouge">EBP</code> and <code class="language-plaintext highlighter-rouge">ESP</code> registers point to the bottom and top of the stack frame, respectively, so the processor can figure out where the stack is.</p>

<p>Another chunk of memory, which we haven’t talked about yet, contains the CPU instructions being executed. The <code class="language-plaintext highlighter-rouge">EIP</code> register contains the memory address of the current instruction. To advance to the next instruction, the CPU just increments <code class="language-plaintext highlighter-rouge">EIP</code><sup id="anchor9"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn9">9</a></sup>. The <code class="language-plaintext highlighter-rouge">call</code> instruction, and all the jump instructions we’ve already encountered, work by manipulating EIP. In these diagrams I’ll show <code class="language-plaintext highlighter-rouge">EIP</code> pointing to the instruction we’re about to execute.</p>

<p>When <code class="language-plaintext highlighter-rouge">bar</code> wants to call <code class="language-plaintext highlighter-rouge">foo</code>, the first step is putting the function arguments on the stack where <code class="language-plaintext highlighter-rouge">foo</code> can find them<sup id="anchor10"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn10">10</a></sup>. They’re pushed onto the stack in reverse order<sup id="anchor11"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn11">11</a>:</sup></p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nf">push</span> <span class="kc">$</span><span class="mi">3</span>
<span class="nf">push</span> <span class="kc">$</span><span class="mi">2</span>
<span class="nf">push</span> <span class="kc">$</span><span class="mi">1</span>
</code></pre></div></div>

<p>Which means the world now looks like this:</p>

<p><img src="./C Compiler, Part 9_ Functions_files/before_function_call_args_pushed.svg" alt="Values 3, 2, and 1 have been pushed onto the stack, in that order. ESP points to memory address 0x20, which holds value 1. EIP points at &quot;call _foo&quot;. EBP is unchanged."></p>

<p>Next <code class="language-plaintext highlighter-rouge">bar</code> issues the <code class="language-plaintext highlighter-rouge">call</code> instruction, which does two things:</p>

<ol>
  <li>Push the address of the instruction <em>after</em> <code class="language-plaintext highlighter-rouge">call</code> (the “return address”) onto the stack.</li>
  <li>Jump to <code class="language-plaintext highlighter-rouge">_foo</code> (by moving the address of <code class="language-plaintext highlighter-rouge">_foo</code> into <code class="language-plaintext highlighter-rouge">EIP</code>).</li>
</ol>

<p>Now the world looks like this:</p>

<p><img src="./C Compiler, Part 9_ Functions_files/after_call.svg" alt="ESP points to 0x24, which holds the return address: address of the instruction just after &quot;call _foo&quot;. EIP points to the first instruction in _foo. EBP is unchanged."></p>

<p>Okay, we’re officially in <code class="language-plaintext highlighter-rouge">foo</code> now. Next step is the function prologue to set up a new stack frame:</p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nf">push</span> <span class="o">%</span><span class="nb">ebp</span>
<span class="nf">mov</span> <span class="o">%</span><span class="nb">esp</span><span class="p">,</span> <span class="o">%</span><span class="nb">ebp</span>
</code></pre></div></div>

<p><img src="./C Compiler, Part 9_ Functions_files/after_function_prologue.svg" alt="ESP and EBP both point at 0x28, which holds the previous value of EBP (0x10)."></p>

<p>Now we can execute the body of <code class="language-plaintext highlighter-rouge">foo</code>. We can access its parameters because they’re at a predictable location on the stack relative to <code class="language-plaintext highlighter-rouge">EBP</code>: <code class="language-plaintext highlighter-rouge">%ebp + 0x8</code>, <code class="language-plaintext highlighter-rouge">%ebp + 0xc</code>, and <code class="language-plaintext highlighter-rouge">%ebp + 0x10</code>, respectively.</p>

<p>Once we’ve done some things in <code class="language-plaintext highlighter-rouge">foo</code>, and placed a return value in <code class="language-plaintext highlighter-rouge">EAX</code>, it’s time to return to <code class="language-plaintext highlighter-rouge">bar</code>. Except for that return value, we want everything on the stack to be exactly the same as it was before the call. The first step is to run the function epilogue to restore the old stack frame:</p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nf">mov</span> <span class="o">%</span><span class="nb">ebp</span><span class="p">,</span> <span class="o">%</span><span class="nb">esp</span> <span class="c1">; deallocate any local variables on the stack</span>
<span class="nf">pop</span> <span class="o">%</span><span class="nb">ebp</span>        <span class="c1">; restore old EBP</span>
</code></pre></div></div>

<p>The stack now looks exactly the same as it did right after the <code class="language-plaintext highlighter-rouge">call</code> instruction, before the function prologue. That means the return address is on top of the stack again.</p>

<p>Then we execute the <code class="language-plaintext highlighter-rouge">ret</code> instruction, which pops the top value off the stack and jumps to it unconditionally (i.e. copies it into <code class="language-plaintext highlighter-rouge">EIP</code>).</p>

<p><img src="./C Compiler, Part 9_ Functions_files/after_ret.svg" alt="ESP points at 0x20, which holds function argument 1. EIP points to the address of the instruction right after &quot;_call foo&quot;."></p>

<p>Now we just have to remove the function arguments from the stack, and we’re done. No need to pop them off one by one; we can just adjust the value of <code class="language-plaintext highlighter-rouge">ESP</code>.</p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nf">add</span> <span class="kc">$</span><span class="mh">0xc</span><span class="p">,</span> <span class="o">%</span><span class="nb">esp</span>
</code></pre></div></div>

<p>Now the stack has been restored to exactly the way it was before the call, and we can proceed with the rest of <code class="language-plaintext highlighter-rouge">bar</code>.</p>

<p>And now we’re finally ready to implement the code-generation stage of the compiler!</p>

<h3 id="function-definitions-1">Function Definitions</h3>

<p>As with <code class="language-plaintext highlighter-rouge">main</code>, we want to make each function global (so it can be called from other files) and label it:</p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code>    <span class="nf">.globl</span> <span class="nv">_fun</span>
<span class="nl">_fun:</span>
</code></pre></div></div>

<p>Make sure to include the leading underscore before the function name if you’re on OS X, and not otherwise.</p>

<p>We already know how to generate the function prologue and epilogue, because that’s also exactly the same as <code class="language-plaintext highlighter-rouge">main</code>. We just need to add all the function parameters to <code class="language-plaintext highlighter-rouge">var_map</code> and <code class="language-plaintext highlighter-rouge">current_scope</code>. As we saw above, the first paramter will be at <code class="language-plaintext highlighter-rouge">ebp + 8</code>, and each subsequent parameter will be four bytes higher than the last:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>param_offset = 8 // first parameter is at EBP + 8
for each function parameter:
    var_map.put(parameter, param offset)
    current_scope.add(parameter)
    param_offset += 4
</code></pre></div></div>

<p>Then parameters get handled like any other variable in the function body.</p>

<h3 id="function-prototypes">Function Prototypes</h3>

<p>We don’t generate any assembly for function prototypes that aren’t part of definitions.</p>

<h3 id="function-calls-1">Function Calls</h3>

<p>As we saw above, the caller needs to:</p>

<ol>
  <li>
    <p>Put the arguments on the stack, in reverse order<sup id="anchor12"><a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#fn12">12</a></sup>:</p>

    <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code> for each argument in reversed(function_call.arguments):
     generate_exp(arg) // puts arg in eax
     emit 'pushl %eax'
</code></pre></div>    </div>
  </li>
  <li>
    <p>Issue the <code class="language-plaintext highlighter-rouge">call</code> instruction.</p>

    <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>     emit 'call _{}'.format(function_name)
</code></pre></div>    </div>
  </li>
  <li>
    <p>Remove the arguments from the stack after the callee returns.</p>

    <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>     bytes_to_remove = 4 * number of function arguments
     emit 'addl ${}, %esp'.format(bytes_to_remove)
</code></pre></div>    </div>
  </li>
</ol>

<h4 id="stack-alignment">Stack Alignment</h4>
<p>On OS X, the stack needs to be 16-byte aligned when the call instruction is issued. A normal C compiler would know exactly how much padding to add to maintain that alignment. But because we push intermediate results of expressions onto the stack, and function calls can occur within larger expressions, we have no idea where the stack pointer is when we encounter a function call. My solution was to emit assembly just before each function call that calculates how much padding is needed, subtracts from ESP accordingly, and then pushes the result of the padding calculation onto the stack, all before putting the function arguments on the stack. After the function returns, the caller first removes the arguments, then pops off the result of the padding calculation, and finally adds that value to ESP to restore it to its original state.</p>

<p>Here’s the assembly to do that:</p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code>    <span class="nf">movl</span> <span class="o">%</span><span class="nb">esp</span><span class="p">,</span> <span class="o">%</span><span class="nb">eax</span>
    <span class="nf">subl</span> <span class="kc">$</span><span class="nv">n</span><span class="p">,</span> <span class="o">%</span><span class="nb">eax</span>    <span class="c1">; n = (4*(arg_count + 1)), # of bytes allocated for arguments + padding value itself</span>
                     <span class="c1">; eax now contains the value ESP will have when call instruction is executed</span>
    <span class="nf">xorl</span> <span class="o">%</span><span class="nb">edx</span><span class="p">,</span> <span class="o">%</span><span class="nb">edx</span>  <span class="c1">; zero out EDX, which will contain remainder of division</span>
    <span class="nf">movl</span> <span class="kc">$</span><span class="mh">0x20</span><span class="p">,</span> <span class="o">%</span><span class="nb">ecx</span> <span class="c1">; 0x20 = 16</span>
    <span class="nf">idivl</span> <span class="o">%</span><span class="nb">ecx</span>       <span class="c1">; calculate eax / 16. EDX contains remainder, i.e. # of bytes to subtract from ESP </span>
    <span class="nf">subl</span> <span class="o">%</span><span class="nb">edx</span><span class="p">,</span> <span class="o">%</span><span class="nb">esp</span>  <span class="c1">; pad ESP</span>
    <span class="nf">pushl</span> <span class="o">%</span><span class="nb">edx</span>       <span class="c1">; push padding result onto stack; we'll need it to deallocate padding later</span>
    <span class="c1">; ...push arguments, call function, remove arguments...</span>
    <span class="nf">popl</span> <span class="o">%</span><span class="nb">edx</span>        <span class="c1">; pop padding result</span>
    <span class="nf">addl</span> <span class="o">%</span><span class="nb">edx</span><span class="p">,</span> <span class="o">%</span><span class="nb">esp</span>  <span class="c1">; remove padding</span>
</code></pre></div></div>

<p>This solution is kind of hideous, so let me know if you come up with a better one.</p>

<h3 id="top-level-1">Top Level</h3>

<p>Obviously, you need to generate assembly for every function definition, not just one.</p>

<h4 id="-task-3">☑ Task:</h4>
<p>Update your compiler to handle all stage 9 examples. Make sure it produces the right return code <em>and</em>, for the “hello world” test case, the right output to stdout.</p>

<h2 id="fibonacci--hello-world">Fibonacci &amp; Hello, World!</h2>

<p>Now we can calculate Fibonacci numbers:</p>
<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">fib</span><span class="p">(</span><span class="kt">int</span> <span class="n">n</span><span class="p">)</span> <span class="p">{</span>
    <span class="k">if</span> <span class="p">(</span><span class="n">n</span> <span class="o">==</span> <span class="mi">0</span> <span class="o">||</span> <span class="n">n</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span> <span class="p">{</span>
        <span class="k">return</span> <span class="n">n</span><span class="p">;</span>
    <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
        <span class="k">return</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span> <span class="o">-</span> <span class="mi">2</span><span class="p">);</span>
    <span class="p">}</span>
<span class="p">}</span>

<span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="kt">int</span> <span class="n">n</span> <span class="o">=</span> <span class="mi">10</span><span class="p">;</span>
    <span class="k">return</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span><span class="p">);</span>
<span class="p">}</span>
</code></pre></div></div>

<p>We can also make calls to the standard library! Since we only know about ints, we can only call standard library functions where the parameters are all ints and the return value is also an int. Lucky for us, <code class="language-plaintext highlighter-rouge">putchar</code> is just such a function. For example, since the ASCII value of ‘A’ is 65, we could print ‘A’ to standard out like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">65</span><span class="p">);</span>
<span class="p">}</span>
</code></pre></div></div>

<p>And we can print out ‘Hello, World!’ like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">putchar</span><span class="p">(</span><span class="kt">int</span> <span class="n">c</span><span class="p">);</span>

<span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">72</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">101</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">108</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">108</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">111</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">44</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">32</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">87</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">111</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">114</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">108</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">100</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">33</span><span class="p">);</span>
    <span class="n">putchar</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
<span class="p">}</span>
</code></pre></div></div>

<h2 id="up-next">Up Next</h2>

<p>My next post or two won’t be about compilers. After that I’ll get back to this series, but I haven’t decided what to implement next. Maybe pointers? We’ll see!</p>

<p><strong>Update:</strong> just kidding, the <a href="https://norasandler.com/2019/02/18/Write-a-Compiler-10.html">next post</a> is about compilers after all, and covers global variables.</p>

<p><em>If you have any questions, corrections, or other feedback, you can <a href="mailto:nora@norasandler.com">email me</a> or <a href="https://github.com/nlsandler/write_a_c_compiler/issues">open an issue</a>.</em></p>

<div class="footnote">
  <p><sup id="fn1">1</sup>
Technically, you can redefine a function in the same <em>program</em> but not in the same <em>translation unit</em>. A translation unit is a source file plus everything that gets pulled in during preprocessing from <code class="language-plaintext highlighter-rouge">#include</code> directives. (Source: <a href="http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf">C11 standard</a>, section 5.1.1.1)</p>

  <p>So it’s legal to redefine a function from a linked library. But linking happens after the compiler runs, so for our purposes the rule is that each function can only be defined once.<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor1">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn2">2</sup>
However, this is legal according to C11:</p>

  <div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">foo</span><span class="p">();</span>

<span class="kt">int</span> <span class="nf">foo</span><span class="p">(</span><span class="kt">int</span> <span class="n">a</span><span class="p">){</span>
    <span class="k">return</span> <span class="n">a</span> <span class="o">+</span> <span class="mi">1</span><span class="p">;</span>
<span class="p">}</span>
</code></pre></div>  </div>

  <p>That’s because <code class="language-plaintext highlighter-rouge">int foo();</code> doesn’t mean “declare a function foo with no variables”; it means “declare a function foo, but we don’t know anything about its variables.” But our compiler diverges from the standard in this respect; it assumes that <code class="language-plaintext highlighter-rouge">int foo();</code> means “declare foo with no variables,” so it will fail here.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor2">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn3">3</sup>
What the linker does and where it looks for function definitions is way beyond the scope of this blog post; if you want to learn more you might like the <a href="http://www.lurklurk.org/linkers/linkers.html">Beginner’s Guide to Linkers</a> or <a href="https://lwn.net/Articles/276782/">this series on linkers</a>.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor3">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn4">4</sup>
If a register is caller-saved, that means the callee is allowed to overwrite it. So if the caller wants to access the value in that register after the callee returns, it needs to push that value onto the stack, then pop it back into the register after the function call has completed.</p>

  <p>If a register is callee-saved, the caller can assume that the register will be unchanged after the function call finishes. So if the callee wants to use that register, it has to save the register’s contents to the stack and restore those contents before returning control to the caller.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor4">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn5">5</sup>
Windows is a lot more complicated; sometimes it uses <code class="language-plaintext highlighter-rouge">cdecl</code>, sometimes it uses different calling conventions. A lot of Linux/OS X documentation doesn’t even call it <code class="language-plaintext highlighter-rouge">cdecl</code>, presumably because it’s the only calling convention in *nix-world.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor5">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn6">6</sup>
Source: <a href="https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/LowLevelABI/130-IA-32_Function_Calling_Conventions/IA32.html">OS X ABI Function Call Guide</a>. It’s not 100% clear why OS X imposes this requirement but it probably has something to do with <a href="https://stackoverflow.com/questions/612443/why-does-the-mac-abi-require-16-byte-stack-alignment-for-x86-32">making SSE instructions run faster</a>.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor6">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn7">7</sup>
See the <a href="https://gcc.gnu.org/onlinedocs/gcc-2.95.2/gcc_2.html#SEC31">GCC documentation</a> on <code class="language-plaintext highlighter-rouge">-mpreferred-stack-boundary</code>.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor7">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn8">8</sup>
Note that these are not valid memory addresses; at least on Linux, the lowest memory address in use is 0x08048000. (See <a href="https://stackoverflow.com/questions/7187981/whats-the-memory-before-0x08048000-used-for-in-32-bit-machine">here</a> and <a href="https://stackoverflow.com/questions/12488010/why-the-entry-point-address-in-my-executable-is-0x8048330-0x330-being-offset-of">here</a>). I think this is also true on OS X but I haven’t checked.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor8">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn9">9</sup>
It’s actually a little more complicated than this; instructions are variable-width, so you can’t increment EIP by the same amount for every instruction.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor9">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn10">10</sup>
Actually, the first step is pushing some caller-saved registers onto the stack. But, like I mentioned earlier, the janky way we’re managing registers means we can ignore this.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor10">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn11">11</sup>
Pushing arguments onto the stack in reverse order makes it easier to handle functions with a variable number of arguments; the callee knows the location of the first argument even if it doesn’t know how many arguments there are.
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor11">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn12">12</sup>
This means we’ll also <em>evaluate</em> the arguments in reverse order. This is valid; function arguments may be evaluated in any order. (Source: <a href="http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf">C11 standard</a> section 6.5.2.2, paragraph 10.)
<a href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html#anchor12">↩</a></p>
</div>

  </div><a class="u-url" href="https://norasandler.com/2018/06/27/Write-a-Compiler-9.html" hidden=""></a>
</article>

      </div>
    </main>

    <footer class="site-footer">

  <div class="wrapper">
      <div class="footer-col-wrapper">
        <div class="footer-col footer-col-1">
            <div class="rc-scout" data-scout-rendered="true"><p class="rc-scout__text"><i class="rc-scout__logo"></i> Want to become a better programmer? <a class="rc-scout__link" href="https://www.recurse.com/scout/click?t=8f520efbc4be09fb83a71920f53a07b7">Join the Recurse Center!</a></p></div><script async="" defer="" src="./C Compiler, Part 9_ Functions_files/loader.js"></script>
        </div>
      </div>
    <div class="footer-col-wrapper">
      <div class="footer-col footer-col-1">
        © 2023 Nora Sandler.
      </div>
    </div>
  </div>

</footer>


  


<script async="" src="./C Compiler, Part 9_ Functions_files/scout-176be16681a03cbddd686f3cc96694d3aab338ea5cb65452f83d989309810528.js"></script><style class="rc-scout__style" type="text/css">.rc-scout {
  display: block;
  padding: 0;
  border: 0;
  margin: 0;
}
.rc-scout__text {
  display: block;
  padding: 0;
  border: 0;
  margin: 0;
  height: 100%;
  font-size: 100%;
}
.rc-scout__logo {
  display: inline-block;
  padding: 0;
  border: 0;
  margin: 0;
  width: 0.85em;
  height: 0.85em;
  background: no-repeat center url('data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%2012%2015%22%3E%3Crect%20x%3D%220%22%20y%3D%220%22%20width%3D%2212%22%20height%3D%2210%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%221%22%20y%3D%221%22%20width%3D%2210%22%20height%3D%228%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%222%22%20width%3D%228%22%20height%3D%226%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%223%22%20y%3D%225%22%20width%3D%222%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%225%22%20width%3D%222%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%229%22%20width%3D%224%22%20height%3D%223%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%221%22%20y%3D%2211%22%20width%3D%2210%22%20height%3D%224%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%220%22%20y%3D%2212%22%20width%3D%2212%22%20height%3D%223%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%223%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%225%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%227%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%228%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%229%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3C%2Fsvg%3E');
}
.rc-scout__link:link, .rc-scout__link:visited {
  color: #3dc06c;
  text-decoration: underline;
}
.rc-scout__link:hover, .rc-scout__link:active {
  color: #4e8b1d;
}
</style></body></html>