summaryrefslogtreecommitdiff
path: root/miniany/doc/Writing a C Compiler, Part 6.html
blob: ed0ff8a4a4416baf92ff9d82f2870e7e457aab4e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
<!DOCTYPE html>
<!-- saved from url=(0058)https://norasandler.com/2018/02/25/Write-a-Compiler-6.html -->
<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1">

  <title>Writing a C Compiler, Part 6</title>
  <meta name="description" content="This is the sixth post in a series. Read part 1 here.">

  <link rel="stylesheet" href="./Writing a C Compiler, Part 6_files/main.css">
  <link rel="canonical" href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html">
  <link rel="alternate" type="application/rss+xml" title="Nora Sandler" href="https://norasandler.com/feed.xml">
  
</head>


  <body>

    <header class="site-header" role="banner">

  <div class="wrapper">
    
    
    <a class="site-title" href="https://norasandler.com/">Nora Sandler</a>
  
    
      <nav class="site-nav">
        <input type="checkbox" id="nav-trigger" class="nav-trigger">
        <label for="nav-trigger">
          <span class="menu-icon">
            <svg viewBox="0 0 18 15" width="18px" height="15px">
              <path fill="#424242" d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.031C17.335,0,18,0.665,18,1.484L18,1.484z"></path>
              <path fill="#424242" d="M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0c0-0.82,0.665-1.484,1.484-1.484 h15.031C17.335,6.031,18,6.696,18,7.516L18,7.516z"></path>
              <path fill="#424242" d="M18,13.516C18,14.335,17.335,15,16.516,15H1.484C0.665,15,0,14.335,0,13.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.031C17.335,12.031,18,12.696,18,13.516L18,13.516z"></path>
            </svg>
          </span>
        </label>

        <div class="trigger">
          
            
            
              
            
          
            
            
              
              <a class="page-link" href="https://norasandler.com/about/">About</a>
              
            
          
            
            
              
              <a class="page-link" href="https://norasandler.com/archive/">Archive</a>
              
            
          
            
            
          
            
            
              
            
          
            
            
              
            
          
            
            
              
            
          
          <a class="page-link" href="https://github.com/nlsandler">Github</a>
          <a href="https://norasandler.com/feed.xml"><img id="rss" height="20" width="20" src="./Writing a C Compiler, Part 6_files/rss.png"></a>

        </div>
      </nav>
    
  </div>
</header>


    <main class="page-content" aria-label="Content">
      <div class="wrapper">
        <article class="post h-entry" itemscope="" itemtype="http://schema.org/BlogPosting">

  <header class="post-header">
    <h1 class="post-title p-name" itemprop="name headline">Writing a C Compiler, Part 6</h1>
    <p class="post-meta">
      <time class="dt-published" datetime="2018-02-25T20:00:00+00:00" itemprop="datePublished">Feb 25, 2018
      </time></p>
  </header>

  <div class="post-content e-content" itemprop="articleBody">
    <p><em>This is the sixth post in a series. Read part 1 <a href="https://norasandler.com/2017/11/29/Write-a-Compiler.html">here</a>.</em></p>

<p>Hi, this blog isn’t dead! It was just, uh, resting. I’ve been swamped with non-blog things for the past few weeks but I’m back on track now, probably, I hope.</p>

<p>Today we’ll implement conditional statements and expressions. As usual, accompanying tests are <a href="https://github.com/nlsandler/write_a_c_compiler">here</a>.</p>

<h1 id="part-6-conditionals">Part 6: Conditionals</h1>

<p>In this post we’ll add support for two types of conditional constructs:</p>

<ol>
  <li>Conditional statements, a.k.a. <code class="language-plaintext highlighter-rouge">if</code> statements</li>
  <li>Ternary conditional expressions, which have the form <code class="language-plaintext highlighter-rouge">a ? b : c</code>. I’ll sometimes just call these “conditional expressions”.</li>
</ol>

<h3 id="if-statements">If Statements</h3>

<p>An <code class="language-plaintext highlighter-rouge">if</code> statement consists of a condition, a substatement that executes if the condition is true, and maybe another substatement that executes if the condition is false. Either of these substatements can be a single statement, like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span>
  <span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
</code></pre></div></div>

<p>or a compound statement, like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span> <span class="p">{</span>
  <span class="kt">int</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span>
  <span class="k">return</span> <span class="n">a</span><span class="o">*</span><span class="mi">2</span><span class="p">;</span>
<span class="p">}</span>
</code></pre></div></div>

<p>Adding support for compound statements is a distinct task that we’re not going to handle in this post. So for now, we’ll only support the first of the examples above, and not the second.</p>

<p>We say a condition is <strong>false</strong> if it evaluates to zero, and <strong>true</strong> otherwise, just like when we implemented boolean operators in earlier posts.</p>

<h4 id="else-if">Else If</h4>

<p>Note that C doesn’t have an explicit <code class="language-plaintext highlighter-rouge">else if</code> construct. If an <code class="language-plaintext highlighter-rouge">if</code> keyword immediately follows an <code class="language-plaintext highlighter-rouge">else</code> keyword, the whole <code class="language-plaintext highlighter-rouge">if</code> statement gets parsed as the <code class="language-plaintext highlighter-rouge">else</code> branch. In other words, the following code snippets are equivalent:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span>
    <span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
<span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">other_flag</span><span class="p">)</span>
    <span class="k">return</span> <span class="mi">1</span><span class="p">;</span>
<span class="k">else</span>
    <span class="k">return</span> <span class="mi">2</span><span class="p">;</span>
</code></pre></div></div>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span>
    <span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
<span class="k">else</span> <span class="p">{</span>
    <span class="k">if</span> <span class="p">(</span><span class="n">other_flag</span><span class="p">)</span>
        <span class="k">return</span> <span class="mi">1</span><span class="p">;</span>
    <span class="k">else</span>
        <span class="k">return</span> <span class="mi">2</span><span class="p">;</span>
<span class="p">}</span>
</code></pre></div></div>

<h3 id="conditional-expressions">Conditional Expressions</h3>

<p>These expressions take the following form:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">a</span> <span class="o">?</span> <span class="n">b</span> <span class="o">:</span> <span class="n">c</span>
</code></pre></div></div>

<p>If <code class="language-plaintext highlighter-rouge">a</code> is true, the expression will evaluate to <code class="language-plaintext highlighter-rouge">b</code>; otherwise it will evaluate to <code class="language-plaintext highlighter-rouge">c</code>.</p>

<p>Note that we should only execute the expression we actually need. For example, in the following code snippet:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="mi">0</span> <span class="o">?</span> <span class="n">foo</span><span class="p">()</span> <span class="o">:</span> <span class="n">bar</span><span class="p">()</span>
</code></pre></div></div>

<p>the function <code class="language-plaintext highlighter-rouge">foo</code> should never be called. You might be tempted to call both <code class="language-plaintext highlighter-rouge">foo</code> and <code class="language-plaintext highlighter-rouge">bar</code>, then discard the result from <code class="language-plaintext highlighter-rouge">foo</code>, but that would be wrong; <code class="language-plaintext highlighter-rouge">foo</code> could print to the console, make a network call, or dereference a null pointer and crash the program. Obviously this point is also true of <code class="language-plaintext highlighter-rouge">if</code> statements – we should execute the <code class="language-plaintext highlighter-rouge">if</code> branch or the <code class="language-plaintext highlighter-rouge">else</code> branch but definitely not both.</p>

<p>Conditional expressions and <code class="language-plaintext highlighter-rouge">if</code> statements might seem very similar, but it’s important to remember that statements and expressions are used in totally different ways. For example, an expression has a value, but a statement doesn’t. So this is legal:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="n">a</span> <span class="o">=</span> <span class="n">flag</span> <span class="o">?</span> <span class="mi">2</span> <span class="o">:</span> <span class="mi">3</span><span class="p">;</span>
</code></pre></div></div>

<p>but this isn’t<sup id="anchor1"><a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#fn1">1</a></sup>:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">//this is bogus</span>
<span class="kt">int</span> <span class="n">a</span> <span class="o">=</span> <span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span>
            <span class="mi">2</span><span class="p">;</span>
        <span class="k">else</span>
            <span class="mi">3</span><span class="p">;</span>
</code></pre></div></div>

<p>On the other hand, a statement can contain other statements, but an expression can’t contain statements. For example, you can nest a <code class="language-plaintext highlighter-rouge">return</code> statement inside an <code class="language-plaintext highlighter-rouge">if</code> statement:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span>
    <span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
</code></pre></div></div>

<p>but you can’t have a <code class="language-plaintext highlighter-rouge">return</code> statement inside a conditional expression:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">//this is also bogus</span>
<span class="n">flag</span> <span class="o">?</span> <span class="k">return</span> <span class="mi">1</span> <span class="o">:</span> <span class="k">return</span> <span class="mi">2</span><span class="p">;</span>
</code></pre></div></div>

<h2 id="lexing">Lexing</h2>

<p>We need to define a few more tokens: <code class="language-plaintext highlighter-rouge">if</code> and <code class="language-plaintext highlighter-rouge">else</code> keywords for <code class="language-plaintext highlighter-rouge">if</code> statements, plus <code class="language-plaintext highlighter-rouge">:</code> and <code class="language-plaintext highlighter-rouge">?</code> operators for conditional expressions. Here’s the full list of tokens, with new tokens in bold at the bottom:</p>

<ul>
  <li>Open brace <code class="language-plaintext highlighter-rouge">{</code></li>
  <li>Close brace <code class="language-plaintext highlighter-rouge">}</code></li>
  <li>Open parenthesis <code class="language-plaintext highlighter-rouge">(</code></li>
  <li>Close parenthesis <code class="language-plaintext highlighter-rouge">)</code></li>
  <li>Semicolon <code class="language-plaintext highlighter-rouge">;</code></li>
  <li>Int keyword <code class="language-plaintext highlighter-rouge">int</code></li>
  <li>Return keyword <code class="language-plaintext highlighter-rouge">return</code></li>
  <li>Identifier <code class="language-plaintext highlighter-rouge">[a-zA-Z]\w*</code></li>
  <li>Integer literal <code class="language-plaintext highlighter-rouge">[0-9]+</code></li>
  <li>Minus <code class="language-plaintext highlighter-rouge">-</code></li>
  <li>Bitwise complement <code class="language-plaintext highlighter-rouge">~</code></li>
  <li>Logical negation <code class="language-plaintext highlighter-rouge">!</code></li>
  <li>Addition <code class="language-plaintext highlighter-rouge">+</code></li>
  <li>Multiplication <code class="language-plaintext highlighter-rouge">*</code></li>
  <li>Division <code class="language-plaintext highlighter-rouge">/</code></li>
  <li>AND <code class="language-plaintext highlighter-rouge">&amp;&amp;</code></li>
  <li>OR <code class="language-plaintext highlighter-rouge">||</code></li>
  <li>Equal <code class="language-plaintext highlighter-rouge">==</code></li>
  <li>Not Equal <code class="language-plaintext highlighter-rouge">!=</code></li>
  <li>Less than <code class="language-plaintext highlighter-rouge">&lt;</code></li>
  <li>Less than or equal <code class="language-plaintext highlighter-rouge">&lt;=</code></li>
  <li>Greater than <code class="language-plaintext highlighter-rouge">&gt;</code></li>
  <li>Greater than or equal <code class="language-plaintext highlighter-rouge">&gt;=</code></li>
  <li>Assignment <code class="language-plaintext highlighter-rouge">=</code></li>
  <li><strong>If keyword <code class="language-plaintext highlighter-rouge">if</code></strong></li>
  <li><strong>Else keyword <code class="language-plaintext highlighter-rouge">else</code></strong></li>
  <li><strong>Colon <code class="language-plaintext highlighter-rouge">:</code></strong></li>
  <li><strong>Question mark <code class="language-plaintext highlighter-rouge">?</code></strong></li>
</ul>

<h4 id="-task">☑ Task:</h4>
<p>Update the <em>lex</em> function to handle the new tokens. It should work for all stage 1-6 examples in the test suite, including the invalid ones.</p>

<h2 id="parsing">Parsing</h2>

<p>We’ll parse conditional expressions and <code class="language-plaintext highlighter-rouge">if</code> statements totally differently. Let’s handle <code class="language-plaintext highlighter-rouge">if</code> statements first.</p>

<h3 id="if-statements-1">If Statements</h3>

<p>So far, we’ve defined three types of statements in our AST: return statements, expressions, and variable declarations. Right now the definition looks like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>statement = Return(exp) 
          | Declare(string, exp option) //string is variable name
                                        //exp is optional initializer
          | Exp(exp)
</code></pre></div></div>

<p>We need to add an <code class="language-plaintext highlighter-rouge">If</code> statement, which has three parts: an expression (the controlling condition), an <code class="language-plaintext highlighter-rouge">if</code> branch and an optional <code class="language-plaintext highlighter-rouge">else</code> branch. Here’s our updated AST definition for statements:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>statement = Return(exp) 
          | Declare(string, exp option) //string is variable name
                                        //exp is optional initializer
          | Exp(exp)
          | If(exp, statement, statement option) //exp is controlling condition
                                                 //first statement is 'if' branch
                                                 //second statement is optional 'else' branch
</code></pre></div></div>

<p>Now let’s update our grammar. The rule for <code class="language-plaintext highlighter-rouge">if</code> statements consists of:</p>

<ul>
  <li>The <code class="language-plaintext highlighter-rouge">if</code> keyword</li>
  <li>An expression wrapped in parentheses (the condition)</li>
  <li>A statement (executed if the condition is true)</li>
  <li>Optionally, the <code class="language-plaintext highlighter-rouge">else</code> keyword, followed by another statement (executed if the condition is false)</li>
</ul>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>"if" "(" &lt;exp&gt; ")" &lt;statement&gt; [ "else" &lt;statement&gt; ]
</code></pre></div></div>

<p>So the updated grammar for statements looks like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;statement&gt; ::= "return" &lt;exp&gt; ";"
              | &lt;exp&gt; ";"
              | "int" &lt;id&gt; [ = &lt;exp&gt; ] ";"
              | "if" "(" &lt;exp&gt; ")" &lt;statement&gt; [ "else" &lt;statement&gt; ]
</code></pre></div></div>

<p>Our definition of statements is recursive! But it’s not left-recursive, so it’s not a problem.</p>

<p>But we have another problem. We defined variable declarations as a type of statement, but declarations in C <strong>aren’t statements</strong>. For example, this code snippet isn’t valid:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">//this will throw a compiler error!</span>
<span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span>
  <span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span>
</code></pre></div></div>

<p>When we added variable declarations in the last post, it didn’t matter whether or not we defined them as statements; we could parse the same subset of C and generate the same assembly either way. Now that we’re dealing with more complex structures like <code class="language-plaintext highlighter-rouge">if</code> statements, that simplification impacts what we can and can’t parse, so we need to fix it.</p>

<p>So we need to move <code class="language-plaintext highlighter-rouge">Declare</code> out of the <code class="language-plaintext highlighter-rouge">statement</code> type and into its own type. But this introduces a new problem: we’ve defined a function body as a list of statements, but if declarations aren’t statements, then you can’t have declarations in a function body. To fix this, we’ll need to tweak how we define functions in our AST. Let’s introduce some terminology:</p>

<ul>
  <li>A <strong>block item</strong> is a statement or declaration.</li>
  <li>A <strong>block</strong> or <strong>compound statement</strong> is a list of block items wrapped in curly braces<sup id="anchor2"><a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#fn2">2</a></sup>.</li>
</ul>

<p>Function bodies are just a special case of blocks; they contain a list of declarations and statements. To represent them, we’ll introduce a new <code class="language-plaintext highlighter-rouge">block_item</code> type that can hold either a statement or a declaration. This will also come in handy when we add support for blocks in general in the next post. With those changes, the relevant parts of our AST will look like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>statement = Return(exp)                                         
          | Exp(exp)
          | Conditional(exp, statement, statement option) //exp is controlling condition
                                                          //first statement is 'if' block
                                                          //second statement is optional 'else' block

declaration = Declare(string, exp option) //string is variable name 
                                          //exp is optional initializer

block_item = Statement(statement) | Declaration(declaration)

function_declaration = Function(string, block_item list) //string is the function name                                                                                      
</code></pre></div></div>

<p>And here’s the updated grammar:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;statement&gt; ::= "return" &lt;exp&gt; ";"
              | &lt;exp&gt; ";"
              | "if" "(" &lt;exp&gt; ")" &lt;statement&gt; [ "else" &lt;statement&gt; ]
&lt;declaration&gt; ::= "int" &lt;id&gt; [ = &lt;exp&gt; ] ";"
&lt;block-item&gt; ::= &lt;statement&gt; | &lt;declaration&gt;
&lt;function&gt; ::= "int" &lt;id&gt; "(" ")" "{" { &lt;block-item&gt; } "}"
</code></pre></div></div>

<p>Now that we have our AST and grammar, you should be able to update your compiler to parse conditional statements. You may want to do that before we move on to conditional expressions.</p>

<h4 id="-task-1">☑ Task:</h4>
<p>Update the parsing pass to handle conditional statements. It should successfully parse all valid stage 6 examples in <code class="language-plaintext highlighter-rouge">write_a_c_compiler/stage_6/valid/statement</code>, and throw an error for all invalid stage 6 examples in <code class="language-plaintext highlighter-rouge">write_a_c_compiler/stage_6/invalid/statement</code>.</p>

<h3 id="conditional-expressions-1">Conditional Expressions</h3>

<p>Now let’s add ternary conditional expressions. Here’s how we’ve defined our AST for expressions so far:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>exp = Assign(string, exp)
    | Var(string) //string is variable name
    | BinOp(binary_operator, exp, exp)
    | UnOp(unary_operator, exp)
    | Constant(int)
</code></pre></div></div>

<p>It’s straightforward to add a <code class="language-plaintext highlighter-rouge">Conditional</code> form:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>exp = Assign(string, exp)
    | Var(string) //string is variable name
    | BinOp(binary_operator, exp, exp)
    | UnOp(unary_operator, exp)
    | Constant(int)
    | Conditional(exp, exp, exp) //the three expressions are the condition, 'if' expression and 'else' expression, respectively
</code></pre></div></div>

<p>We also need to update the grammar rules for expressions, which currently look like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;exp&gt; ::= &lt;id&gt; "=" &lt;exp&gt; | &lt;logical-or-exp&gt;
&lt;logical-or-exp&gt; ::= &lt;logical-and-exp&gt; { "||" &lt;logical-and-exp&gt; } 
...more rules...
</code></pre></div></div>

<p>The conditional operator has lower precedence than assignment (<code class="language-plaintext highlighter-rouge">=</code>) but higher precedence than logical OR (<code class="language-plaintext highlighter-rouge">||</code>), and it’s right-associative. We can take its grammar rule straight from section 6.5.15 of the <a href="http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf">C11 standard</a>:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;conditional-exp&gt; ::= &lt;logical-or-exp&gt; "?" &lt;exp&gt; ":" &lt;conditional-exp&gt;
</code></pre></div></div>

<p>Let’s think about why it’s defined this way. I’ll refer to the three sub-expressions as <strong>e1</strong>, <strong>e2</strong>, and <strong>e3</strong>, such that a conditional expression has the form <code class="language-plaintext highlighter-rouge">e1 ? e2 : e3</code>. Expression <strong>e1</strong> has to be a <code class="language-plaintext highlighter-rouge">&lt;logical-or-exp&gt;</code> because it can’t be an assignment expression or a conditional expression. It can’t be an assignment expression because assignment has lower precedence than the conditional operator. In other words:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">a</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">?</span> <span class="mi">2</span> <span class="o">:</span> <span class="mi">3</span><span class="p">;</span>
</code></pre></div></div>

<p>must be parsed as:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">a</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1</span> <span class="o">?</span> <span class="mi">2</span> <span class="o">:</span> <span class="mi">3</span><span class="p">);</span>
</code></pre></div></div>

<p>In our current grammar this is specified unambiguously, but if we instead defined a conditional expression as:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;conditional-exp&gt; ::= &lt;exp&gt; "?" &lt;exp&gt; ":" &lt;conditional-exp&gt;
</code></pre></div></div>

<p>then it would be ambiguous; the statement above could also be parsed as:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">(</span><span class="n">a</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">?</span> <span class="mi">2</span> <span class="o">:</span> <span class="mi">3</span><span class="p">;</span>
</code></pre></div></div>

<p>Note that <code class="language-plaintext highlighter-rouge">(a = 1) ? 2 : 3;</code> is a valid statement, but you need the parentheses in order to parse it that way.</p>

<p>So that’s why <strong>e1</strong> can’t be an assignment expression. It can’t be a conditional expression because <code class="language-plaintext highlighter-rouge">?</code> is right-associative. In other words:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">flag1</span> <span class="o">?</span> <span class="mi">4</span> <span class="o">:</span> <span class="n">flag2</span> <span class="o">?</span> <span class="mi">6</span> <span class="o">:</span> <span class="mi">7</span>
</code></pre></div></div>

<p>must be parsed as</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">flag1</span> <span class="o">?</span> <span class="mi">4</span> <span class="o">:</span> <span class="p">(</span><span class="n">flag2</span> <span class="o">?</span> <span class="mi">6</span> <span class="o">:</span> <span class="mi">7</span><span class="p">)</span>
</code></pre></div></div>

<p>If we had defined a conditional expression as:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;conditional-exp&gt; ::= &lt;conditional-exp&gt; "?" &lt;exp&gt; ":" &lt;conditional-exp&gt;
</code></pre></div></div>

<p>then the example above could also be parsed as:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">(</span><span class="n">flag1</span> <span class="o">?</span> <span class="mi">4</span> <span class="o">:</span> <span class="n">flag2</span><span class="p">)</span> <span class="o">?</span> <span class="mi">6</span> <span class="o">:</span> <span class="mi">7</span>
</code></pre></div></div>

<p>and the grammar would be ambiguous.</p>

<p>Expression <strong>e2</strong> in our ternary conditional can take any form; safely fenced in by <code class="language-plaintext highlighter-rouge">?</code> and <code class="language-plaintext highlighter-rouge">:</code>, it can’t introduce any grammatical ambiguity. You can think of implicit parentheses wrapping everything between <code class="language-plaintext highlighter-rouge">?</code> and <code class="language-plaintext highlighter-rouge">:</code>.</p>

<p>Expression <strong>e3</strong> can be another ternary conditional, as in the example <code class="language-plaintext highlighter-rouge">a &gt; b ? 4 : flag ? 6 : 7</code>. But it <em>can’t</em> be an assignment statement – why not? Let’s look at the following example:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">flag</span> <span class="o">?</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">:</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">0</span>
</code></pre></div></div>

<p>If we try to compile this with gcc, we’ll get something like the following error message:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>error: expression is not assignable
    flag ? a = 1 : a = 0;
    ~~~~~~~~~~~~~~~~ ^
</code></pre></div></div>

<p>In other words, gcc tried to parse the expression like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">(</span><span class="n">flag</span> <span class="o">?</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">:</span> <span class="n">a</span><span class="p">)</span> <span class="o">=</span> <span class="mi">0</span>
</code></pre></div></div>

<p>This obviously doesn’t work because the expression on the left isn’t a variable<sup id="anchor3"><a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#fn3">3</a></sup>. You might wonder why we can’t use the following grammar rule:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;conditional-exp&gt; ::= &lt;logical-or-exp&gt; "?" &lt;exp&gt; ":" &lt;exp&gt;
</code></pre></div></div>

<p>Then gcc could just parse it like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">flag</span> <span class="o">?</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">:</span> <span class="p">(</span><span class="n">a</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span>
</code></pre></div></div>

<p>That grammar rule would work fine; in fact, that’s how conditional expressions are defined in C++<sup id="anchor4"><a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#fn4">4</a></sup>. I don’t know why it’s different in C, but if <em>you</em> know I’d like to hear from you.</p>

<p>We also need a way to specify expressions that aren’t conditionals, so we’ll make the ‘conditional’ part of this grammar rule optional<sup id="anchor5"><a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#fn5">5</a></sup>:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;conditional-exp&gt; ::= &lt;logical-or-exp&gt; [ "?" &lt;exp&gt; ":" &lt;conditional-exp&gt; ]
</code></pre></div></div>

<p>Anyway, we now know the correct grammar. Here are all the new and updated grammar rules concerning expressions:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;exp&gt; ::= &lt;id&gt; "=" &lt;exp&gt; | &lt;conditional-exp&gt;
&lt;conditional-exp&gt; ::= &lt;logical-or-exp&gt; [ "?" &lt;exp&gt; ":" &lt;conditional-exp&gt; ]
&lt;logical-or-exp&gt; ::= &lt;logical-and-exp&gt; { "||" &lt;logical-and-exp&gt; } 
...
</code></pre></div></div>

<h4 id="-task-2">☑ Task:</h4>
<p>Update the parsing pass to handle ternary conditional expressions. At this point, it should successfully parse all valid stage 6 examples, and throw an error for all invalid examples.</p>

<h3 id="put-it-all-together">Put It All Together</h3>
<p>For the sake of completeness, here’s our full AST definition and grammar, with new and changed parts bolded:</p>

<p>AST:</p>
<pre>program = Program(function_declaration)

<b>function_declaration = Function(string, block_item list) //string is the function name

block_item = Statement(statement) | Declaration(declaration)

declaration = Declare(string, exp option) //string is variable name 
                                          //exp is optional initializer</b>

statement = Return(exp) 
          | Exp(exp)
<b>          | Conditional(exp, statement, statement option) //exp is controlling condition
                                                          //first statement is 'if' block
                                                          //second statement is optional 'else' block
</b>                                                          
exp = Assign(string, exp)
    | Var(string) //string is variable name
    | BinOp(binary_operator, exp, exp)
    | UnOp(unary_operator, exp)
    | Constant(int)
<b>    | CondExp(exp, exp, exp) //the three expressions are the condition, 'if' expression and 'else' expression, respectively</b>
</pre>

<p>Grammar:</p>
<pre>&lt;program&gt; ::= &lt;function&gt;
<b>&lt;function&gt; ::= "int" &lt;id&gt; "(" ")" "{" { &lt;block-item&gt; } "}"
&lt;block-item&gt; ::= &lt;statement&gt; | &lt;declaration&gt;
&lt;declaration&gt; ::= "int" &lt;id&gt; [ = &lt;exp&gt; ] ";"</b>
&lt;statement&gt; ::= "return" &lt;exp&gt; ";"
              | &lt;exp&gt; ";"
<b>              | "if" "(" &lt;exp&gt; ")" &lt;statement&gt; [ "else" &lt;statement&gt; ]</b>
<br>
<b>&lt;exp&gt; ::= &lt;id&gt; "=" &lt;exp&gt; | &lt;conditional-exp&gt;
&lt;conditional-exp&gt; ::= &lt;logical-or-exp&gt; [ "?" &lt;exp&gt; ":" &lt;conditional-exp&gt; ]</b>
&lt;logical-or-exp&gt; ::= &lt;logical-and-exp&gt; { "||" &lt;logical-and-exp&gt; }
&lt;logical-and-exp&gt; ::= &lt;equality-exp&gt; { "&amp;&amp;" &lt;equality-exp&gt; }
&lt;equality-exp&gt; ::= &lt;relational-exp&gt; { ("!=" | "==") &lt;relational-exp&gt; }
&lt;relational-exp&gt; ::= &lt;additive-exp&gt; { ("&lt;" | "&gt;" | "&lt;=" | "&gt;=") &lt;additive-exp&gt; }
&lt;additive-exp&gt; ::= &lt;term&gt; { ("+" | "-") &lt;term&gt; }
&lt;term&gt; ::= &lt;factor&gt; { ("*" | "/") &lt;factor&gt; }
&lt;factor&gt; ::= "(" &lt;exp&gt; ")" | &lt;unary_op&gt; &lt;factor&gt; | &lt;int&gt; | &lt;id&gt;
&lt;unary_op&gt; ::= "!" | "~" | "-"
</pre>

<h2 id="code-generation">Code Generation</h2>

<p>To generate the assembly for <code class="language-plaintext highlighter-rouge">if</code> statements and conditional expressions, we’re going to need conditional and unconditional jumps, which we introduced in <a href="https://norasandler.com/2017/12/28/Write-a-Compiler-4.html">part 4</a>. We can generate assembly for the conditional expression <code class="language-plaintext highlighter-rouge">e1 ? e2 : e3</code> as follows:</p>

<div class="language-nasm highlighter-rouge"><div class="highlight"><pre class="highlight"><code>    <span class="err">&lt;</span><span class="nf">CODE</span> <span class="nv">FOR</span> <span class="nv">e1</span> <span class="nv">GOES</span> <span class="nv">HERE</span><span class="o">&gt;</span>
    <span class="nf">cmpl</span> <span class="kc">$</span><span class="mi">0</span><span class="p">,</span> <span class="o">%</span><span class="nb">eax</span>
    <span class="nf">je</span>   <span class="nv">_e3</span>                  <span class="c1">; if e1 == 0, e1 is false so execute e3</span>
    <span class="err">&lt;</span><span class="nf">CODE</span> <span class="nv">FOR</span> <span class="nv">e2</span> <span class="nv">GOES</span> <span class="nv">HERE</span><span class="o">&gt;</span>  <span class="c1">; we're still here so e1 must be true. execute e2.</span>
    <span class="nf">jmp</span>  <span class="nv">_post_conditional</span>    <span class="c1">; jump over e3</span>
<span class="nl">_e3:</span>
    <span class="err">&lt;</span><span class="nf">CODE</span> <span class="nv">FOR</span> <span class="nv">e3</span> <span class="nv">GOES</span> <span class="nv">HERE</span><span class="o">&gt;</span>  <span class="c1">; we jumped here because e1 was false. execute e3.</span>
<span class="nl">_post_conditional:</span>            <span class="c1">; we need this label to jump over e3</span>
</code></pre></div></div>

<p>The assembly for <code class="language-plaintext highlighter-rouge">if</code> statements is quite similar, although it’s slightly complicated by the optional <code class="language-plaintext highlighter-rouge">else</code> clause. I’ll let you figure it out yourself.</p>

<p>As in the assembly for <code class="language-plaintext highlighter-rouge">&amp;&amp;</code> and <code class="language-plaintext highlighter-rouge">||</code> we saw earlier, labels have to be unique.</p>

<h4 id="-task-3">☑ Task:</h4>
<p>Update the code-generation pass to correctly handle ternary conditional expressions and <code class="language-plaintext highlighter-rouge">if</code> statements. It should success on all valid examples and fail on all invalid examples for stages 1-6.</p>

<h2 id="up-next">Up Next</h2>
<p>In the <a href="https://norasandler.com/2018/03/14/Write-a-Compiler-7.html">next post</a>, we’ll add compound statements, so brace yourself (pun intended) for an exciting discussion of lexical scope! I <strong>hope</strong> that will be two weeks from now and not two months. See you then!</p>

<p><em>If you have any questions, corrections, or other feedback, you can <a href="mailto:nora@norasandler.com">email me</a> or <a href="https://github.com/nlsandler/write_a_c_compiler/issues">open an issue</a>.</em></p>

<div class="footnote">
  <p><sup id="fn1">1</sup>
But the <code class="language-plaintext highlighter-rouge">if</code> construct in many functional languages <em>is</em> an expression, and works just like C’s ternary conditionals. This is valid OCaml, for instance:</p>
  <div class="language-ocaml highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">let</span> <span class="n">a</span> <span class="o">=</span> <span class="k">if</span> <span class="n">b</span> <span class="k">then</span> <span class="mi">1</span> <span class="k">else</span> <span class="mi">2</span>
</code></pre></div>  </div>
  <p><a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#anchor1">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn2">2</sup>
The terms “block” and “compound statement” aren’t 100% synonymous; compound statements are a subset of blocks. But the terms are similar enough that it’s fine to treat them as synonyms for now. <a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#anchor2">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn3">3</sup>
Actually, any “modifiable lvalue” is allowed on the left side of an assignment statement, not just variables. <code class="language-plaintext highlighter-rouge">*x</code>, <code class="language-plaintext highlighter-rouge">&amp;x</code>, <code class="language-plaintext highlighter-rouge">++x</code>, and <code class="language-plaintext highlighter-rouge">x++</code> are all examples of modifiable lvalues. Conditional expressions aren’t, though. <a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#anchor3">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn4">4</sup>
See <a href="https://stackoverflow.com/a/26448707">this Stack Overflow answer</a> and the <a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3690.pdf">C++11 standard</a>. <a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#anchor4">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn5">5</sup>
Thanks to Stephen Bastians for <a href="https://github.com/nlsandler/write_a_c_compiler/issues/4">pointing out a mistake in this grammar rule</a> in an earlier verson of this post.<a href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html#anchor5">↩</a></p>
</div>

  </div><a class="u-url" href="https://norasandler.com/2018/02/25/Write-a-Compiler-6.html" hidden=""></a>
</article>

      </div>
    </main>

    <footer class="site-footer">

  <div class="wrapper">
      <div class="footer-col-wrapper">
        <div class="footer-col footer-col-1">
            <div class="rc-scout" data-scout-rendered="true"><p class="rc-scout__text"><i class="rc-scout__logo"></i> Want to become a better programmer? <a class="rc-scout__link" href="https://www.recurse.com/scout/click?t=8f520efbc4be09fb83a71920f53a07b7">Join the Recurse Center!</a></p></div><script async="" defer="" src="./Writing a C Compiler, Part 6_files/loader.js"></script>
        </div>
      </div>
    <div class="footer-col-wrapper">
      <div class="footer-col footer-col-1">
        © 2023 Nora Sandler.
      </div>
    </div>
  </div>

</footer>


  


<script async="" src="./Writing a C Compiler, Part 6_files/scout-176be16681a03cbddd686f3cc96694d3aab338ea5cb65452f83d989309810528.js"></script><style class="rc-scout__style" type="text/css">.rc-scout {
  display: block;
  padding: 0;
  border: 0;
  margin: 0;
}
.rc-scout__text {
  display: block;
  padding: 0;
  border: 0;
  margin: 0;
  height: 100%;
  font-size: 100%;
}
.rc-scout__logo {
  display: inline-block;
  padding: 0;
  border: 0;
  margin: 0;
  width: 0.85em;
  height: 0.85em;
  background: no-repeat center url('data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%2012%2015%22%3E%3Crect%20x%3D%220%22%20y%3D%220%22%20width%3D%2212%22%20height%3D%2210%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%221%22%20y%3D%221%22%20width%3D%2210%22%20height%3D%228%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%222%22%20width%3D%228%22%20height%3D%226%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%223%22%20y%3D%225%22%20width%3D%222%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%225%22%20width%3D%222%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%229%22%20width%3D%224%22%20height%3D%223%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%221%22%20y%3D%2211%22%20width%3D%2210%22%20height%3D%224%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%220%22%20y%3D%2212%22%20width%3D%2212%22%20height%3D%223%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%223%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%225%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%227%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%228%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%229%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3C%2Fsvg%3E');
}
.rc-scout__link:link, .rc-scout__link:visited {
  color: #3dc06c;
  text-decoration: underline;
}
.rc-scout__link:hover, .rc-scout__link:active {
  color: #4e8b1d;
}
</style></body></html>