summaryrefslogtreecommitdiff
path: root/miniany/doc/Writing a C Compiler, Part 7.html
blob: 0e621ad811e4fbe309baf54d780892e3a01f932f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
<!DOCTYPE html>
<!-- saved from url=(0058)https://norasandler.com/2018/03/14/Write-a-Compiler-7.html -->
<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1">

  <title>Writing a C Compiler, Part 7</title>
  <meta name="description" content="Update 4/9">

  <link rel="stylesheet" href="./Writing a C Compiler, Part 7_files/main.css">
  <link rel="canonical" href="https://norasandler.com/2018/03/14/Write-a-Compiler-7.html">
  <link rel="alternate" type="application/rss+xml" title="Nora Sandler" href="https://norasandler.com/feed.xml">
  
</head>


  <body>

    <header class="site-header" role="banner">

  <div class="wrapper">
    
    
    <a class="site-title" href="https://norasandler.com/">Nora Sandler</a>
  
    
      <nav class="site-nav">
        <input type="checkbox" id="nav-trigger" class="nav-trigger">
        <label for="nav-trigger">
          <span class="menu-icon">
            <svg viewBox="0 0 18 15" width="18px" height="15px">
              <path fill="#424242" d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.031C17.335,0,18,0.665,18,1.484L18,1.484z"></path>
              <path fill="#424242" d="M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0c0-0.82,0.665-1.484,1.484-1.484 h15.031C17.335,6.031,18,6.696,18,7.516L18,7.516z"></path>
              <path fill="#424242" d="M18,13.516C18,14.335,17.335,15,16.516,15H1.484C0.665,15,0,14.335,0,13.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.031C17.335,12.031,18,12.696,18,13.516L18,13.516z"></path>
            </svg>
          </span>
        </label>

        <div class="trigger">
          
            
            
              
            
          
            
            
              
              <a class="page-link" href="https://norasandler.com/about/">About</a>
              
            
          
            
            
              
              <a class="page-link" href="https://norasandler.com/archive/">Archive</a>
              
            
          
            
            
          
            
            
              
            
          
            
            
              
            
          
            
            
              
            
          
          <a class="page-link" href="https://github.com/nlsandler">Github</a>
          <a href="https://norasandler.com/feed.xml"><img id="rss" height="20" width="20" src="./Writing a C Compiler, Part 7_files/rss.png"></a>

        </div>
      </nav>
    
  </div>
</header>


    <main class="page-content" aria-label="Content">
      <div class="wrapper">
        <article class="post h-entry" itemscope="" itemtype="http://schema.org/BlogPosting">

  <header class="post-header">
    <h1 class="post-title p-name" itemprop="name headline">Writing a C Compiler, Part 7</h1>
    <p class="post-meta">
      <time class="dt-published" datetime="2018-03-14T23:00:00+00:00" itemprop="datePublished">Mar 14, 2018
      </time></p>
  </header>

  <div class="post-content e-content" itemprop="articleBody">
    <h2 id="update-49">Update 4/9</h2>

<ul>
  <li>There was a pretty big mistake in the original post - I forgot to deallocate local variables! I’ve added the “Deallocating Variables” section, and added the example from that section to the test suite.</li>
</ul>

<p><em>This is the seventh post in a series. Read part 1 <a href="https://norasandler.com/2017/11/29/Write-a-Compiler.html">here</a>.</em></p>

<p>In this post we’re adding support for compound statements, which are a little weird because they don’t <em>do</em> very much.
We’ll generate almost no new assembly in this post, but we’ll be able to compile new and exciting programs at the end of it.
How is this possible? Let’s find out!</p>

<p>As usual, accompanying tests are <a href="https://github.com/nlsandler/write_a_c_compiler">here</a>.</p>

<h1 id="part-7-compound-statements">Part 7: Compound Statements</h1>

<p>A compound statement is just a list of statements and declarations wrapped in curly braces.
They’re normally used as substatements of <code class="language-plaintext highlighter-rouge">if</code>, <code class="language-plaintext highlighter-rouge">while</code>, and other control structures, like this<sup id="anchor1"><a href="https://norasandler.com/2018/03/14/Write-a-Compiler-7.html#fn1">1</a></sup>:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">if</span> <span class="p">(</span><span class="n">flag</span><span class="p">)</span> <span class="p">{</span>
    <span class="c1">//this is a compound statement!</span>
    <span class="kt">int</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span>
<span class="p">}</span>
</code></pre></div></div>

<p>but they can also be free-standing, like this:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="kt">int</span> <span class="n">a</span><span class="p">;</span>
    <span class="p">{</span>
        <span class="c1">//this is also a compound statement!</span>
        <span class="n">a</span> <span class="o">=</span> <span class="mi">4</span><span class="p">;</span>
    <span class="p">}</span>
<span class="p">}</span>
</code></pre></div></div>

<p>You can have deeply nested compound statements:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
    <span class="c1">//compound statement #1 (function bodies are compound statements!)</span>
    <span class="kt">int</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span>
    <span class="p">{</span>
        <span class="c1">//compound statement #2</span>
        <span class="n">a</span> <span class="o">=</span> <span class="mi">2</span><span class="p">;</span>
        <span class="p">{</span>
            <span class="c1">//compound statement #3</span>
            <span class="n">a</span> <span class="o">=</span> <span class="mi">3</span><span class="p">;</span>
            <span class="k">if</span> <span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="p">{</span>
                <span class="c1">//compound statement #4</span>
                <span class="n">a</span> <span class="o">=</span> <span class="mi">4</span><span class="p">;</span>
            <span class="p">}</span>
        <span class="p">}</span>
    <span class="p">}</span>
<span class="p">}</span>
</code></pre></div></div>

<p>Like I mentioned in the last post, a compound statement is one type of <strong>block</strong>, and I’m going to use the terms synonymously for the rest of this post. C uses <strong>lexical scoping</strong>; a variable’s scope is dictated by the block where it’s defined. (By “scope”, I mean where in the program you’re allowed to refer to it.) More precisely, a variable’s scope starts at its definition, and ends when you exit the block where it’s defined<sup id="anchor2"><a href="https://norasandler.com/2018/03/14/Write-a-Compiler-7.html#fn2">2</a></sup>. Up until this point in the series, function bodies were the only blocks around, so a variable could be used at any point in <code class="language-plaintext highlighter-rouge">main</code> after it was defined. Now it’s more complicated. I’m going to talk a bit about how scoping works in C; if you’re already familiar with this, you can skip ahead to the next section.</p>

<p>If a variable is defined in an inner scope, it can’t be accessed in an outer scope:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">// here is the outer scope</span>
<span class="p">{</span>
    <span class="c1">// here is the inner scope</span>
    <span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">2</span><span class="p">;</span>
<span class="p">}</span>

<span class="c1">// now we're back in the outer scope</span>
<span class="n">foo</span> <span class="o">=</span> <span class="mi">3</span><span class="p">;</span> <span class="c1">// ERROR - foo isn't defined in this scope!</span>
</code></pre></div></div>

<p>However, code in an inner scope can access variables in an outer scope:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">2</span><span class="p">;</span>
<span class="p">{</span>
    <span class="n">a</span> <span class="o">=</span> <span class="mi">4</span><span class="p">;</span> <span class="c1">// this is okay</span>
<span class="p">}</span>
<span class="k">return</span> <span class="n">a</span><span class="p">;</span> <span class="c1">// returns 4 - changes made inside the inner scope are reflected here</span>
</code></pre></div></div>

<p>You can’t have two variables with the same name in the same scope:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span>
<span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span> <span class="c1">//This will throw a compiler error</span>
</code></pre></div></div>

<p>But you can have two variables with the same name in <em>different</em> scopes. Once the variable in the inner scope is declared, it will shadow the variable from the outer scope; the outer variable will be inaccessible until the inner variable goes out of scope.</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span>
<span class="p">{</span>
    <span class="kt">int</span> <span class="n">foo</span><span class="p">;</span> <span class="c1">// this is a TOTALLY DIFFERENT foo, unrelated to foo from earlier</span>
    <span class="n">foo</span> <span class="o">=</span> <span class="mi">2</span><span class="p">;</span> <span class="c1">// this refers to the inner foo; outer foo is inaccessible</span>
<span class="p">}</span>
<span class="k">return</span> <span class="n">foo</span><span class="p">;</span> <span class="c1">//this will return 0 - it refers to the original foo, which is unchanged</span>
</code></pre></div></div>

<p>The key idea here is that the inner and outer <code class="language-plaintext highlighter-rouge">foo</code> variables are two totally unrelated variables that just happen to have the same name. When we’re in the inner block, the outer variable <code class="language-plaintext highlighter-rouge">foo</code> still exists, but we have no way to refer to it, because <code class="language-plaintext highlighter-rouge">foo</code> now refers to the inner variable.</p>

<p>Note, however, that outer <code class="language-plaintext highlighter-rouge">foo</code> is accessible in the inner block before the point where it’s shadowed:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span>
<span class="p">{</span>
    <span class="n">foo</span> <span class="o">=</span> <span class="mi">3</span><span class="p">;</span> <span class="c1">//changes outer foo</span>
    <span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">4</span><span class="p">;</span> <span class="c1">//defines inner foo, shadowing outer foo</span>
<span class="p">}</span>
<span class="k">return</span> <span class="n">foo</span><span class="p">;</span> <span class="c1">//returns 3</span>
</code></pre></div></div>

<h2 id="lexing">Lexing</h2>

<p>Compound statements don’t require any new tokens, so we don’t need to touch the lexing pass this week.</p>

<h2 id="parsing">Parsing</h2>

<p>Here’s the current definition of statements in our AST:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>statement = Return(exp) 
          | Exp(exp)
          | Conditional(exp, statement, statement option) //exp is controlling condition
                                                          //first statement is 'if' block
                                                          //second statement is optional 'else' block
</code></pre></div></div>

<p>We just need to add a <code class="language-plaintext highlighter-rouge">Compound</code> statement to this definition.
Also recall that we added a <code class="language-plaintext highlighter-rouge">block_item</code> construct to the AST in our last post:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>block_item = Statement(statement) | Declaration(declaration)
</code></pre></div></div>

<p>A compound statement is just a list of statements and declarations, so our new definition of statements will look like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>statement = Return(exp) 
          | Exp(exp)
          | Conditional(exp, statement, statement option) //exp is controlling condition
                                                          //first statement is 'if' block
                                                          //second statement is optional 'else' block
          | Compound(block_item list)
</code></pre></div></div>

<p>We’ll parse conditional expressions and conditional statements totally differently. Statements are easier, so let’s handle those first.</p>

<p>Now let’s update our grammar. The rule for blocks is extremely simple:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>"{" { &lt;block-item&gt; } "}
</code></pre></div></div>

<p>Note that <code class="language-plaintext highlighter-rouge">"{" "}"</code> are literal curly braces, and <code class="language-plaintext highlighter-rouge">{ }</code> indicates repetition. This is hard to read! But it just means we have an arbitrary number of block items wrapped in braces – if you refer back to the grammar for <code class="language-plaintext highlighter-rouge">&lt;function&gt;</code> you can see that we define function bodies exactly the same way.</p>

<p>Putting it all together, our updated grammar looks like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>&lt;statement&gt; ::= "return" &lt;exp&gt; ";"
              | &lt;exp&gt; ";"
              | "if" "(" &lt;exp&gt; ")" &lt;statement&gt; [ "else" &lt;statement&gt; ]
              | "{" { &lt;block-item&gt; } "}
</code></pre></div></div>

<h4 id="-task">☑ Task:</h4>
<p>Update the parsing pass to handle blocks. It should successfully parse all valid examples in stage 1-7. As in part 5, some invalid examples should fail during parsing and some should fail during code generation. At this point, your parsing pass should throw an appropriate error for all invalid stage 7 examples whose names start with <code class="language-plaintext highlighter-rouge">syntax_err</code>.</p>

<h2 id="code-generation">Code Generation</h2>

<p>As we saw earlier, it’s possible to have two different variables, in two different scopes, stored at two different locations on the stack, with the same name. Here’s an example:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">3</span><span class="p">;</span>
<span class="p">{</span>
  <span class="kt">int</span> <span class="n">foo</span> <span class="o">=</span> <span class="mi">4</span><span class="p">;</span>
<span class="p">}</span>
</code></pre></div></div>

<p>So, whenever the program refers to variable <code class="language-plaintext highlighter-rouge">foo</code>, our generated code needs to access the correct <code class="language-plaintext highlighter-rouge">foo</code> on the stack – or raise an error if <code class="language-plaintext highlighter-rouge">foo</code> has gone out of scope. The code generation step this week is all about managing the variable map so we always look up the right <code class="language-plaintext highlighter-rouge">foo</code>.</p>

<p>The trick here is that <strong>every block has a separate copy of the variable map</strong>. That way, defining (or redefining) a variable in an inner scope won’t interfere with an outer scope. And if you’re using an immutable map (which you should be), every block will necessarily get its own variable map, so this approach is surprisingly easy.</p>

<p>Let’s look at some pseudocode. After Part 5, your code to generate a function body probably looked something like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>def generate_function_body(body):
  // initialize variable map and stack index
  var_map = Map()
  stack_index = -4

  //process statements one at a time
  for statement in body:
    var_map, stack_index = generate_statement(statement, var_map, stack_index) 
</code></pre></div></div>

<p>Note that <code class="language-plaintext highlighter-rouge">generate_statement</code> has to return a new <code class="language-plaintext highlighter-rouge">var_map</code>. Every declaration updates the variable map (or, more precisely, creates a new variable map), and in part 5 <code class="language-plaintext highlighter-rouge">generate_statement</code> also handled declarations. Whenever we process a declaration, we need to return the latest, greatest variable map so future statements can reference the variable we just declared.</p>

<p>But in the last post, we separated statements from declarations in our AST, so you might have changed the last line to:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>    var_map, stack_index = generate_statement_or_declaration(statement, var_map, stack_index) 
</code></pre></div></div>

<p>At this point, a declaration will create a new variable map, but a statement won’t. Whatever happens in a statement – including a compound statement, which may itself contain declarations – has no impact on the variable map for the enclosing scope. Once you understand that point, handling nested scopes is easy:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>def generate_function_body(body):
  // initialize variable map and stack index
  var_map = Map()
  stack_index = -4

  //process statements one at a time
  for block_item in body:
    if block_item is a declaration:
        //update the variable map
        var_map, stack_index = generate_declaration(statement, var_map, stack_index)
    else:
        //don't update the variable map
        generate_statement(statement, var_map, stack_index)
</code></pre></div></div>

<p>Of course you’ll need to generalize <code class="language-plaintext highlighter-rouge">generate_function_body</code> into <code class="language-plaintext highlighter-rouge">generate_block</code>; the one difference between generating a function body and any other block is that you need to initialize your empty variable map and stack index at the start of the function body.</p>

<p>Now let’s walk through a small example to see how this maintains the right variable maps for different scopes:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">main</span><span class="p">(){</span>
    <span class="c1">// 1) function body</span>
    <span class="p">{</span>   <span class="c1">// 2) block</span>
        <span class="kt">int</span> <span class="n">a</span> <span class="o">=</span> <span class="mi">2</span><span class="p">;</span> <span class="c1">// 3) variable declaration</span>
        <span class="n">a</span> <span class="o">=</span> <span class="mi">3</span><span class="p">;</span> <span class="c1">// 4) variable reference</span>
    <span class="p">}</span>
    <span class="k">return</span> <span class="n">a</span><span class="p">;</span> <span class="c1">// 5) return statement</span>
<span class="p">}</span>
</code></pre></div></div>

<ol>
  <li>We’ll process the function body with <code class="language-plaintext highlighter-rouge">generate_block</code>. Right now we’ve got an empty variable map.</li>
  <li>We call <code class="language-plaintext highlighter-rouge">generate_block</code> recursively to process the inner block. The variable map is still empty.</li>
  <li>This is a declaration, so we add <code class="language-plaintext highlighter-rouge">a</code> to the variable map (technically, we create a copy of the variable map that contains <code class="language-plaintext highlighter-rouge">a</code>, because all these maps are immutable).</li>
  <li>We look up <code class="language-plaintext highlighter-rouge">a</code>’s location on the stack in the variable map from step 3.</li>
  <li>Back in the outer scope, <code class="language-plaintext highlighter-rouge">var_map</code> refers to the original, <em>empty</em> variable map. Since <code class="language-plaintext highlighter-rouge">a</code> isn’t defined in this map, this will throw an error, as it should.</li>
</ol>

<p>The code for handling declarations also needs to be changed. The pseudocode for processing declarations from part 5 included this line:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>if var_map.contains("a"):
  fail() //shouldn't declare a var twice
</code></pre></div></div>

<p>This is now incorrect; it’s legal to declare two variables with the same name, as long as the declarations aren’t in the same scope. To solve this, we need a way to distinguish between variables defined in the current scope, and variables defined in an outer scope. My solution was to maintain a set of variables that are defined in the current scope, which means <code class="language-plaintext highlighter-rouge">generate_block</code> now looks something like this:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>def generate_block(block, var_map, stack_index):

  current_scope = Set()

  //process statements one at a time
  for block_item in block:
    if block_item is a declaration:
        //update the variable map
        var_map, stack_index, current_scope = generate_declaration(statement, var_map, stack_index, current_scope)
    else:
        //don't update the variable map
        generate_statement(statement, var_map, stack_index)
</code></pre></div></div>

<p>Finally, we check <code class="language-plaintext highlighter-rouge">current_scope</code>, rather than <code class="language-plaintext highlighter-rouge">var_map</code>, for duplicate variable declarations, and add the variable to both structures on success:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>if current_scope.contains("a"):
  fail() //shouldn't declare a var twice in the same scope
else:
  //emit assembly, update stack_index and var_map as before...
  new_scope = current_scope.add("a")
  return (var_map, stack_index, current_scope)
</code></pre></div></div>

<p>This solution feels hacky, but I haven’t come up with a better one.</p>

<p>Now, if <code class="language-plaintext highlighter-rouge">a</code> is redefined in an inner scope, it just overwrites the old <code class="language-plaintext highlighter-rouge">a</code> in the variable map, so this scope and any inner ones will use the correct stack location, corresponding to the innermost definition of <code class="language-plaintext highlighter-rouge">a</code>. This won’t affect the outer scope at all, because the outer scope is still using the original, unmodified variable map.</p>

<h3 id="deallocating-variables">Deallocating Variables</h3>

<p>We’ve carefully managed our variable map to prevent a block from interfering with any variable declarations in its enclosing scope. But there’s one side effect we couldn’t avoid: allocating a variable changes the stack pointer. This is a problem, because the stack pointer and our <code class="language-plaintext highlighter-rouge">stack_index</code> variable will get out of sync. Consider the following example:</p>

<div class="language-c highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kt">int</span> <span class="nf">main</span><span class="p">()</span> <span class="p">{</span>
  <span class="p">{</span>
    <span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span>
  <span class="p">}</span>
  <span class="kt">int</span> <span class="n">j</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span>
  <span class="k">return</span> <span class="n">j</span><span class="p">;</span>
<span class="p">}</span>
</code></pre></div></div>

<p>At first, the variable map is empty and <code class="language-plaintext highlighter-rouge">stack_index</code> is -4, because the first empty spot on the stack is four bytes below EBP:</p>

<p><img src="./Writing a C Compiler, Part 7_files/bad_stack_pointer.svg" alt="EBP and ESP point to the same location, the lowest address on the stack; the stack index points to the address just below it."></p>

<p>When we process the block in this example with <code class="language-plaintext highlighter-rouge">generate_block</code>, we’ll push <code class="language-plaintext highlighter-rouge">i</code> onto the stack:</p>

<pre><code class="language-asm">    movl $0, %eax
    push %eax
</code></pre>

<p>Now ESP is at EBP - 4, and <code class="language-plaintext highlighter-rouge">stack_index</code> is -8:</p>

<p><img src="./Writing a C Compiler, Part 7_files/bad_stack_pointer_2.svg" alt="ESP points at the address just below EBP on the call stack, which holds literal value 0. the stack index points just below that value. An entry in the variable map associates i with address EBP - 4"></p>

<p>After we exit the block, we forget that we allocated <code class="language-plaintext highlighter-rouge">i</code>. That means <code class="language-plaintext highlighter-rouge">i</code> is no longer in our variable map, and we’re still working with our original stack index of -4; remember that <code class="language-plaintext highlighter-rouge">generate_block</code> doesn’t return a stack index. We <em>should</em> forget <code class="language-plaintext highlighter-rouge">i</code>, because it’s out of scope.</p>

<p>The problem is, <code class="language-plaintext highlighter-rouge">i</code> is still there, because ESP is still pointing at it.</p>

<p><img src="./Writing a C Compiler, Part 7_files/bad_stack_pointer_3.svg" alt="ESP and stack index both point at EBP - 4, the address where i was allocated. The variable map is empty."></p>

<p>So when we push <code class="language-plaintext highlighter-rouge">j</code>, it will be just below <code class="language-plaintext highlighter-rouge">i</code>, at EBP - 8:</p>

<pre><code class="language-asm">  movl $1, %eax
  push %eax
</code></pre>

<p><img src="./Writing a C Compiler, Part 7_files/bad_stack_pointer_4.svg" alt="ESP and the stack index both point at EBP - 8, which contains literal value 1. However, the variable map associates j with EBP - 4. "></p>

<p>But because the stack index was -4, we’ll add a mapping from <code class="language-plaintext highlighter-rouge">j</code> to -4 in our variable map. Any future references to <code class="language-plaintext highlighter-rouge">j</code> (like in the return statement) will incorrectly use the stack location of <code class="language-plaintext highlighter-rouge">i</code> instead.</p>

<p>We <em>could</em> solve this by having <code class="language-plaintext highlighter-rouge">generate_block</code> return a stack index, but it’s probably better to just pop variables off the stack when we’re done with them, right at the end of <code class="language-plaintext highlighter-rouge">generate_block</code>. Conveniently, the size of <code class="language-plaintext highlighter-rouge">current_scope</code> tells us how many variables we need to pop.</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>def generate_block(block, var_map, stack_index)

  current_scope = Set()
  ...as before...

  bytes_to_deallocate = 4 * current_scope.size()
  emit "    addl ${}, %esp".format(bytes_to_deallocate)
</code></pre></div></div>

<h4 id="-task-1">☑ Task:</h4>
<p>Update the code-generation pass to correctly handle compound statements. It should succeed on all valid examples and fail on all invalid examples for stages 1-7.</p>

<h2 id="up-next">Up Next</h2>
<p>In the <a href="https://norasandler.com/2018/04/10/Write-a-Compiler-8.html">next post</a>, we’ll add <code class="language-plaintext highlighter-rouge">for</code>, <code class="language-plaintext highlighter-rouge">do</code>, and <code class="language-plaintext highlighter-rouge">while</code> loops. See you then!</p>

<p><em>If you have any questions, corrections, or other feedback, you can <a href="mailto:nora@norasandler.com">email me</a> or <a href="https://github.com/nlsandler/write_a_c_compiler/issues">open an issue</a>.</em></p>

<div class="footnote">
  <p><sup id="fn1">1</sup>
I’ll use comments to clarify the code snippets throughout this post, even though we haven’t added support for comments yet.
<a href="https://norasandler.com/2018/03/14/Write-a-Compiler-7.html#anchor1">↩</a></p>
</div>

<div class="footnote">
  <p><sup id="fn2">2</sup>
Global variables work a bit differently but we haven’t added those yet.
<a href="https://norasandler.com/2018/03/14/Write-a-Compiler-7.html#anchor2">↩</a></p>
</div>

  </div><a class="u-url" href="https://norasandler.com/2018/03/14/Write-a-Compiler-7.html" hidden=""></a>
</article>

      </div>
    </main>

    <footer class="site-footer">

  <div class="wrapper">
      <div class="footer-col-wrapper">
        <div class="footer-col footer-col-1">
            <div class="rc-scout" data-scout-rendered="true"><p class="rc-scout__text"><i class="rc-scout__logo"></i> Want to become a better programmer? <a class="rc-scout__link" href="https://www.recurse.com/scout/click?t=8f520efbc4be09fb83a71920f53a07b7">Join the Recurse Center!</a></p></div><script async="" defer="" src="./Writing a C Compiler, Part 7_files/loader.js"></script>
        </div>
      </div>
    <div class="footer-col-wrapper">
      <div class="footer-col footer-col-1">
        © 2023 Nora Sandler.
      </div>
    </div>
  </div>

</footer>


  


<script async="" src="./Writing a C Compiler, Part 7_files/scout-176be16681a03cbddd686f3cc96694d3aab338ea5cb65452f83d989309810528.js"></script><style class="rc-scout__style" type="text/css">.rc-scout {
  display: block;
  padding: 0;
  border: 0;
  margin: 0;
}
.rc-scout__text {
  display: block;
  padding: 0;
  border: 0;
  margin: 0;
  height: 100%;
  font-size: 100%;
}
.rc-scout__logo {
  display: inline-block;
  padding: 0;
  border: 0;
  margin: 0;
  width: 0.85em;
  height: 0.85em;
  background: no-repeat center url('data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%2012%2015%22%3E%3Crect%20x%3D%220%22%20y%3D%220%22%20width%3D%2212%22%20height%3D%2210%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%221%22%20y%3D%221%22%20width%3D%2210%22%20height%3D%228%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%222%22%20width%3D%228%22%20height%3D%226%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%223%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%223%22%20y%3D%225%22%20width%3D%222%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%225%22%20width%3D%222%22%20height%3D%221%22%20fill%3D%22%233dc06c%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%229%22%20width%3D%224%22%20height%3D%223%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%221%22%20y%3D%2211%22%20width%3D%2210%22%20height%3D%224%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%220%22%20y%3D%2212%22%20width%3D%2212%22%20height%3D%223%22%20fill%3D%22%23000%22%3E%3C%2Frect%3E%3Crect%20x%3D%222%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%223%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%224%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%225%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%226%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%227%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%228%22%20y%3D%2213%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3Crect%20x%3D%229%22%20y%3D%2212%22%20width%3D%221%22%20height%3D%221%22%20fill%3D%22%23fff%22%3E%3C%2Frect%3E%3C%2Fsvg%3E');
}
.rc-scout__link:link, .rc-scout__link:visited {
  color: #3dc06c;
  text-decoration: underline;
}
.rc-scout__link:hover, .rc-scout__link:active {
  color: #4e8b1d;
}
</style></body></html>