summaryrefslogtreecommitdiff
path: root/miniany/doc/zserge.com_blog_cucu-part3.html
blob: 89bb2bc7ad57dfa8a8e0d951be7e6c5cdc28ef17 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
<!DOCTYPE html>
<html lang="en-US">
	<head><script src="//archive.org/includes/analytics.js?v=cf34f82" type="text/javascript"></script>
<script type="text/javascript">window.addEventListener('DOMContentLoaded',function(){var v=archive_analytics.values;v.service='wb';v.server_name='wwwb-app221.us.archive.org';v.server_ms=291;archive_analytics.send_pageview({});});</script>
<script type="text/javascript" src="https://web-static.archive.org/_static/js/bundle-playback.js?v=t1Bf4PY_" charset="utf-8"></script>
<script type="text/javascript" src="https://web-static.archive.org/_static/js/wombat.js?v=txqj7nKC" charset="utf-8"></script>
<script>window.RufflePlayer=window.RufflePlayer||{};window.RufflePlayer.config={"autoplay":"on","unmuteOverlay":"hidden"};</script>
<script type="text/javascript" src="https://web-static.archive.org/_static/js/ruffle.js"></script>
<script type="text/javascript">
  __wm.init("http://web.archive.org/web");
  __wm.wombat("http://zserge.com:80/blog/cucu-part3.html","20160511234108","http://web.archive.org/","web","https://web-static.archive.org/_static/",
	      "1463010068");
</script>
<link rel="stylesheet" type="text/css" href="https://web-static.archive.org/_static/css/banner-styles.css?v=S1zqJCYt" />
<link rel="stylesheet" type="text/css" href="https://web-static.archive.org/_static/css/iconochive.css?v=qtvMKcIJ" />
<!-- End Wayback Rewrite JS Include -->

		<meta charset="UTF-8"/>
		<title>cucu: a compiler you can understand (3/3)</title>
		<meta content="Compilers is fun. Want to write your own one?" name="description"/>
		<meta content="Serge Zaitsev" name="author"/>
		<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
		<meta content="width=device-width" name="viewport"/>
		<link href="http://web.archive.org/web/20160511234108/http://zserge.com/rss.xml" rel="alternate" title="RSS" type="application/rss+xml"/>
		<link href="blog/cucu-part3.html" rel="canonical"/>		<!-- OpenGraph data -->
		<meta content="cucu: a compiler you can understand (3/3)" property="og:title"/>
		<meta content="article" property="og:type"/>
		<meta content="blog/cucu-part3.html" property="og:url"/>
		<meta content="http://web.archive.org/web/20160511234108im_/http://zserge.com/logo.png" property="og:image"/>
		<meta content="Compilers is fun. Want to write your own one?" property="og:description"/>
		<meta content="en_US" property="og:locale"/>		<!-- Twitter card data -->
		<meta content="summary" name="twitter:card"/>
		<meta content="@zsergo" name="twitter:site"/>		<!-- Fonts -->
		<link href="//web.archive.org/web/20160511234108cs_/http://fonts.googleapis.com/css?family=Merriweather:900,900italic,300,300italic" rel="stylesheet" type="text/css"/>
		<link href="//web.archive.org/web/20160511234108cs_/http://fonts.googleapis.com/css?family=Roboto+Mono:100,200,300,400,400italic,700,700italic&amp;subset=latin,latin-ext" rel="stylesheet" type="text/css"/>		<!-- Styles -->
		<link href="/web/20160511234108cs_/http://zserge.com/styles.css" rel="stylesheet" type="text/css"/>		<!-- Favicons -->
		<link href="/web/20160511234108im_/http://zserge.com/favicon.ico" rel="shortcut icon"/>
		<link href="/web/20160511234108im_/http://zserge.com/favicon.128.png" rel="apple-touch-icon-precomposed"/>
	</head>
	<body><!-- BEGIN WAYBACK TOOLBAR INSERT -->
<script>__wm.rw(0);</script>
<div id="wm-ipp-base" lang="en" style="display:none;direction:ltr;">
<div id="wm-ipp" style="position:fixed;left:0;top:0;right:0;">
<div id="donato" style="position:relative;width:100%;">
  <div id="donato-base">
    <iframe id="donato-if" src="https://archive.org/includes/donate.php?as_page=1&amp;platform=wb&amp;referer=http%3A//web.archive.org/web/20160511234108/http%3A//zserge.com/blog/cucu-part3.html"
	    scrolling="no" frameborder="0" style="width:100%; height:100%">
    </iframe>
  </div>
</div><div id="wm-ipp-inside">
  <div id="wm-toolbar" style="position:relative;display:flex;flex-flow:row nowrap;justify-content:space-between;">
    <div id="wm-logo" style="/*width:110px;*/padding-top:12px;">
      <a href="/web/" title="Wayback Machine home page"><img src="https://web-static.archive.org/_static/images/toolbar/wayback-toolbar-logo-200.png" srcset="https://web-static.archive.org/_static/images/toolbar/wayback-toolbar-logo-100.png, https://web-static.archive.org/_static/images/toolbar/wayback-toolbar-logo-150.png 1.5x, https://web-static.archive.org/_static/images/toolbar/wayback-toolbar-logo-200.png 2x" alt="Wayback Machine" style="width:100px" border="0" /></a>
    </div>
    <div class="c" style="display:flex;flex-flow:column nowrap;justify-content:space-between;flex:1;">
      <form class="u" style="display:flex;flex-direction:row;flex-wrap:nowrap;" target="_top" method="get" action="/web/submit" name="wmtb" id="wmtb"><input type="text" name="url" id="wmtbURL" value="http://zserge.com/blog/cucu-part3.html" onfocus="this.focus();this.select();" style="flex:1;"/><input type="hidden" name="type" value="replay" /><input type="hidden" name="date" value="20160511234108" /><input type="submit" value="Go" />
      </form>
      <div style="display:flex;flex-flow:row nowrap;align-items:flex-end;">
                <div class="s" id="wm-nav-captures" style="flex:1;">
                    <a class="t" href="/web/20160511234108*/http://zserge.com/blog/cucu-part3.html" title="See a list of every capture for this URL">10 captures</a>
          <div class="r" title="Timespan for captures of this URL">28 Oct 2012 - 09 Feb 2021</div>
          </div>
        <div class="k">
          <a href="" id="wm-graph-anchor">
            <div id="wm-ipp-sparkline" title="Explore captures for this URL" style="position: relative">
              <canvas id="wm-sparkline-canvas" width="725" height="27" border="0"></canvas>
            </div>
          </a>
        </div>
      </div>
    </div>
    <div class="n">
      <table>
        <tbody>
          <!-- NEXT/PREV MONTH NAV AND MONTH INDICATOR -->
          <tr class="m">
            <td class="b" nowrap="nowrap"><a href="http://web.archive.org/web/20160410041115/http://zserge.com:80/blog/cucu-part3.html" title="10 Apr 2016"><strong>Apr</strong></a></td>
            <td class="c" id="displayMonthEl" title="You are here: 23:41:08 May 11, 2016">MAY</td>
            <td class="f" nowrap="nowrap"><a href="http://web.archive.org/web/20210209112718/http://zserge.com/blog/cucu-part3.html" title="09 Feb 2021"><strong>Feb</strong></a></td>
          </tr>
          <!-- NEXT/PREV CAPTURE NAV AND DAY OF MONTH INDICATOR -->
          <tr class="d">
            <td class="b" nowrap="nowrap"><a href="http://web.archive.org/web/20160410041115/http://zserge.com:80/blog/cucu-part3.html" title="04:11:15 Apr 10, 2016"><img src="https://web-static.archive.org/_static/images/toolbar/wm_tb_prv_on.png" alt="Previous capture" width="14" height="16" border="0" /></a></td>
            <td class="c" id="displayDayEl" style="width:34px;font-size:22px;white-space:nowrap;" title="You are here: 23:41:08 May 11, 2016">11</td>
            <td class="f" nowrap="nowrap"><a href="http://web.archive.org/web/20210209112718/http://zserge.com/blog/cucu-part3.html" title="11:27:18 Feb 09, 2021"><img src="https://web-static.archive.org/_static/images/toolbar/wm_tb_nxt_on.png" alt="Next capture" width="14" height="16" border="0" /></a></td>
          </tr>
          <!-- NEXT/PREV YEAR NAV AND YEAR INDICATOR -->
          <tr class="y">
            <td class="b" nowrap="nowrap"><a href="http://web.archive.org/web/20150404213154/http://zserge.com:80/blog/cucu-part3.html" title="04 Apr 2015"><strong>2015</strong></a></td>
            <td class="c" id="displayYearEl" title="You are here: 23:41:08 May 11, 2016">2016</td>
            <td class="f" nowrap="nowrap"><a href="http://web.archive.org/web/20210209112718/http://zserge.com/blog/cucu-part3.html" title="09 Feb 2021"><strong>2021</strong></a></td>
          </tr>
        </tbody>
      </table>
    </div>
    <div class="r" style="display:flex;flex-flow:column nowrap;align-items:flex-end;justify-content:space-between;">
      <div id="wm-btns" style="text-align:right;height:23px;">
                <span class="xxs">
          <div id="wm-save-snapshot-success">success</div>
          <div id="wm-save-snapshot-fail">fail</div>
          <a id="wm-save-snapshot-open" href="#" title="Share via My Web Archive" >
            <span class="iconochive-web"></span>
          </a>
          <a href="https://archive.org/account/login.php" title="Sign In" id="wm-sign-in">
            <span class="iconochive-person"></span>
          </a>
          <span id="wm-save-snapshot-in-progress" class="iconochive-web"></span>
        </span>
                <a class="xxs" href="http://faq.web.archive.org/" title="Get some help using the Wayback Machine" style="top:-6px;"><span class="iconochive-question" style="color:rgb(87,186,244);font-size:160%;"></span></a>
        <a id="wm-tb-close" href="#close" style="top:-2px;" title="Close the toolbar"><span class="iconochive-remove-circle" style="color:#888888;font-size:240%;"></span></a>
      </div>
      <div id="wm-share" class="xxs">
        <a href="/web/20160511234108/http://web.archive.org/screenshot/http://zserge.com/blog/cucu-part3.html"
           id="wm-screenshot"
           title="screenshot">
          <span class="wm-icon-screen-shot"></span>
        </a>
        <a href="#" id="wm-video" title="video">
          <span class="iconochive-movies"></span>
        </a>
        <a id="wm-share-facebook" href="#" data-url="http://web.archive.org/web/20160511234108/http://zserge.com:80/blog/cucu-part3.html" title="Share on Facebook" style="margin-right:5px;" target="_blank"><span class="iconochive-facebook" style="color:#3b5998;font-size:160%;"></span></a>
        <a id="wm-share-twitter" href="#" data-url="http://web.archive.org/web/20160511234108/http://zserge.com:80/blog/cucu-part3.html" title="Share on Twitter" style="margin-right:5px;" target="_blank"><span class="iconochive-twitter" style="color:#1dcaff;font-size:160%;"></span></a>
      </div>
      <div style="padding-right:2px;text-align:right;white-space:nowrap;">
        <a id="wm-expand" class="wm-btn wm-closed" href="#expand" onclick="__wm.ex(event);return false;"><span id="wm-expand-icon" class="iconochive-down-solid"></span> <span class="xxs" style="font-size:80%;">About this capture</span></a>
      </div>
    </div>
  </div>
    <div id="wm-capinfo" style="border-top:1px solid #777;display:none; overflow: hidden">
        <div id="wm-capinfo-notice" source="api"></div>
                <div id="wm-capinfo-collected-by">
    <div style="background-color:#666;color:#fff;font-weight:bold;text-align:center">COLLECTED BY</div>
    <div style="padding:3px;position:relative" id="wm-collected-by-content">
            <div style="display:inline-block;vertical-align:top;width:50%;">
			<span class="c-logo" style="background-image:url(https://archive.org/services/img/alexacrawls);"></span>
		Organization: <a style="color:#33f;" href="https://archive.org/details/alexacrawls" target="_new"><span class="wm-title">Alexa Crawls</span></a>
		<div style="max-height:75px;overflow:hidden;position:relative;">
	  <div style="position:absolute;top:0;left:0;width:100%;height:75px;background:linear-gradient(to bottom,rgba(255,255,255,0) 0%,rgba(255,255,255,0) 90%,rgba(255,255,255,255) 100%);"></div>
	  Starting in 1996, <a href="http://www.alexa.com/">Alexa Internet</a> has been donating their crawl data to the Internet Archive.  Flowing in every day, these data are added to the <a href="http://web.archive.org/">Wayback Machine</a> after an embargo period.
	</div>
	      </div>
      <div style="display:inline-block;vertical-align:top;width:49%;">
			<span class="c-logo" style="background-image:url(https://archive.org/services/img/alexacrawls)"></span>
		<div>Collection: <a style="color:#33f;" href="https://archive.org/details/alexacrawls" target="_new"><span class="wm-title">Alexa Crawls</span></a></div>
		<div style="max-height:75px;overflow:hidden;position:relative;">
	  <div style="position:absolute;top:0;left:0;width:100%;height:75px;background:linear-gradient(to bottom,rgba(255,255,255,0) 0%,rgba(255,255,255,0) 90%,rgba(255,255,255,255) 100%);"></div>
	  Starting in 1996, <a href="http://www.alexa.com/">Alexa Internet</a> has been donating their crawl data to the Internet Archive.  Flowing in every day, these data are added to the <a href="http://web.archive.org/">Wayback Machine</a> after an embargo period.
	</div>
	      </div>
    </div>
    </div>
    <div id="wm-capinfo-timestamps">
    <div style="background-color:#666;color:#fff;font-weight:bold;text-align:center" title="Timestamps for the elements of this page">TIMESTAMPS</div>
    <div>
      <div id="wm-capresources" style="margin:0 5px 5px 5px;max-height:250px;overflow-y:scroll !important"></div>
      <div id="wm-capresources-loading" style="text-align:left;margin:0 20px 5px 5px;display:none"><img src="https://web-static.archive.org/_static/images/loading.gif" alt="loading" /></div>
    </div>
    </div>
  </div></div></div></div><div id="wm-ipp-print">The Wayback Machine - http://web.archive.org/web/20160511234108/http://zserge.com:80/blog/cucu-part3.html</div>
<script type="text/javascript">//<![CDATA[
__wm.bt(725,27,25,2,"web","http://zserge.com/blog/cucu-part3.html","20160511234108",1996,"https://web-static.archive.org/_static/",["https://web-static.archive.org/_static/css/banner-styles.css?v=S1zqJCYt","https://web-static.archive.org/_static/css/iconochive.css?v=qtvMKcIJ"], false);
  __wm.rw(1);
//]]></script>
<!-- END WAYBACK TOOLBAR INSERT -->
 
		<header>
			<nav>
				<a class="logo" href="/web/20160511234108/http://zserge.com/">Z</a>
			</nav>
			<div class="empty"></div>
			<nav>
				<section>
					<a href="/web/20160511234108/http://zserge.com/about.html">about</a>
					<a href="/web/20160511234108/http://zserge.com/blog.html">posts</a>
				</section>
				<section>
					<a href="http://web.archive.org/web/20160511234108/https://twitter.com/zsergo">@me</a>
					<a href="http://web.archive.org/web/20160511234108/https://plus.google.com/u/0/+SergeZaitsev">+me</a>
					<a href="http://web.archive.org/web/20160511234108/https://github.com/zserge">&lt;/&gt;me</a>
				</section>
			</nav>
		</header>
		<p>Let&rsquo;s talk about compiler backends. C should be a portable language, and there
is no need to rewrite the whole compiler if you want to port it to the new CPU
architecture.</p>

<p>Backend is a part of the compiler that generates low-level byte code. Compiler
itself just calls backend functions. Good backend design makes the compiler
highly portable.</p>

<p>I wanted CUCU to be a portable compiler (actually, a cross-compiler).
So, I decided to move backend code generator to a separate module.</p>

<p>But before we dive into the backend code generation, let&rsquo;s think of how we will
test it.</p>

<h2>minimal target architecture</h2>

<p>Our minimal target has two registers (let&rsquo;s call them A and B), and a stack.
Register A is an accumulator. We could use fixed-size instruction codes, as
many RISC CPUs do, but there&rsquo;s not much fun in decoding hexadecimal numbers to
find out what the code actually does.</p>

<p>I have chosen a simpler way. Every instruction is 8 bytes long (yes, it&rsquo;s huge,
but who cares - it&rsquo;s a test imaginary architecture). And the first 7 bytes of
the instruction are just ASCII symbols, and the last one is 0x10 (&rsquo;\n&rsquo;).</p>

<p>This allows us to use human-readable instruction codes, like <code>A:=A+B</code>,
<code>A:=1ef8</code>, or <code>push A</code>. These seem to be self-explanatory (&ldquo;add register B
to register A&rdquo;, &ldquo;put 0x1ef8 to register A&rdquo; and &ldquo;push the value of register A
to the stack&rdquo;).</p>

<ul>
<li><code>A:=NNNN</code> - put value of 0xNNNN to register A</li>
<li><code>A:=m[A]</code> - put value at address stored in register A to register A (as byte)</li>
<li><code>A:=M[A]</code> - put value at address stored in register A to register A (as int)</li>
<li><code>m[B]:=A</code> - store the value of A to address stored in B (as byte)</li>
<li><code>M[B]:=A</code> - store the value of A to address stored in B (as int)</li>
<li><code>push A</code> - push the value of A on the stack</li>
<li><code>pop B</code> - pop the value from the stack to B</li>
<li><code>A:=B+A</code> - add A and B</li>
<li><code>A:=B-A</code> - subtract A and B</li>
<li><code>A:=B&amp;A</code> - bitwise AND operation</li>
<li><code>A:=B|A</code> - bitwise OR operation</li>
<li><code>A:=B!=A</code> - A is 1 if B!=A, and 0 otherwise</li>
<li><code>A:=B==A</code> - A is 1 if B==A, and 0 otherwise</li>
<li><code>A:=B&lt;&lt;A</code> - shift left the value of B to A bits</li>
<li><code>A:=B&gt;&gt;A</code> - shift right the value of B to A bits</li>
<li><code>A:=B&lt;A</code> - A is 1 if B&lt;A, and 0 otherwise</li>
<li><code>popNNNN</code> - drop NNNN items from the stack</li>
<li><code>sp@NNNN</code> - put the value at address NNNN on the stack to the register A</li>
<li><code>jmpNNNN</code> - jump to address NNNN</li>
<li><code>jpzNNNN</code> - jump to address NNNN if A is zero</li>
<li><code>call A</code> - call function at address stored in A</li>
<li><code>ret</code>    - return from the function</li>
</ul>

<h2>cucu backend design</h2>

<p>We include &ldquo;gen.c&rdquo; file, which is actually a backend implementation.
Let&rsquo;s start with simple functions: <code>gen_start()</code> and <code>gen_finish()</code>.
They are used to generate application prologue (like PE header, or ELF header)
and to post-process byte code.</p>

<p>Compiler provides the function <code>emit()</code>, that emits byte code to the <code>code[]</code>
array. At the very end, <code>code[]</code> contains a ready-to-use compiled program.</p>

<p>So, compiler calls backend function, and backend just calls emit() with the
specific byte-codes, and this is how we get compiled machine code.</p>

<p>Now we need to define what are the most common constructions, that backend
should implement. Let&rsquo;s start with this simple program:</p>

<pre><code>int main() {
    return 0;
}
</code></pre>

<p>Let&rsquo;s talk about calling convention. This is how arguments are passed to the
function and how the return value is fetched. We stated before, that arguments
are placed on the stack (the 1st argument is pushed 1st). Let&rsquo;s make a deal,
that the value of the register A when function returns is its return value.</p>

<p>Actually, we will store all values to register A. Register B will be used as
a temporary register.</p>

<p>For the program above I would expect the byte code to be something like:</p>

<pre><code>A:=0000
ret
</code></pre>

<p>So, we need a function to put immediate numeric value to the register A, and a
function to do the return. We will call them <code>gen_const(int)</code> and <code>gen_ret()</code>.</p>

<p><code>gen_const</code> will be called every time the compiler finds a primary expression
which is a number, and <code>gen_ret</code> is called when the compiler finds a return
statement. Though, some functions can be <code>void</code>, and thus have no explicit
<code>return</code> statement. So, for safety and simplicity we will add an extra
<code>get_ret()</code> at the end of every function, even if there has been an explicit
return before.</p>

<p><em>Our compiler never claimed to be optimal or fast or safe, so double-return is
fine for us</em></p>

<h2>maths</h2>

<p>Now let&rsquo;s compile some arithmetic expressions. They are all similar, so I&rsquo;ll
show how it&rsquo;s done with an example of addition. Remember how parser works? It
parses (and theoretically, compiles) the left part of the expression, then the
right part, and then performs an operation.</p>

<p>This is how a typical math expression is compiled (remember a joke about an
elephant, a giraffe and a fridge?):</p>

<pre><code>..evaluate left part
push A
..evaluate right path
pop B
A:=A+B
</code></pre>

<p>After we compiler the left part we need to store the results (register A)
somewhere. Stack is the most simple way to do it. So, an expression
<code>1+2+3</code> will be compiled to:</p>

<pre><code>A:=0001  -+     -+
push A    |      |
A:=0002   | 1+2  |
pop B     |      |
A:=A+B   -+      | +3
push A           |
A:=0003          |
pop B            |
A:=A+B       ----+
</code></pre>

<h2>other stuff</h2>

<p>Work with symbols is easy, too.</p>

<p>To call a function we put its address to register A,
then to a <code>gen_call()</code> which emits <code>call A</code>.</p>

<p>Accessing local symbols is done using <code>gen_stack_addr</code> which
return the address of the given item on the stack.</p>

<p>Accessing global symbols is done using <code>gen_sym_addr()</code>.
Also, when a new symbols is created the compiler might need to
generate some code (e.g. when generation assembler code).
<code>gen_sym</code> is used for such cases.</p>

<p><code>gen_pop</code> drops N items from the stack (increases stack pointer).</p>

<p><code>gen_unref</code> is used to unreference pointers. Depending on the type (byte or int)
it generates <code>A:=m[A]</code> or <code>A:=M[A]</code> code.</p>

<p><code>gen_array</code> puts the constant array on the stack. Or maybe if there is a
separate segment for data it should store the array there.</p>

<p>Finally, <code>gen_patch()</code> is used to patch jump address when compiling if/while
statement. The reason is that when we meet such statement we must generate a
jump instruction, but the address is not known yet - it depends on how many code
is compiled for the body statetment. So, after the body is compiled
we patch jump address with the correct value.</p>

<p>We are done. Let&rsquo;s try the following program:</p>

<pre><code>int main() {
    int k;
    k = 3;
    if (k == 0) {
        return 1;
    }
    return 0;
}

jmp0008 # by gen_start(): jump to main(), which is the next instruction at 0x8
push A  # add space for local variable &quot;k&quot;
sp@0000 # get the address of the local variable #0 (&quot;k&quot;)
push A  # store it
A:=0003 # put 3 to A
pop B   # get the stored earlier address of &quot;k&quot;
M[B]:=A # put the value of A to &quot;k&quot; as int
sp@0000 # get the address of &quot;k&quot;
A:=M[A] # get its value as int
push A  # store it
A:=0000 # put 0 to A
pop B   # get the value of &quot;k&quot; stored earlier
A:=B==A # compare A and B (&quot;k&quot; and zero)
jmz0090 # if false (A!=B, k!=0) - jump to 0x90
A:=0001 # store 1 to A as return value
pop0001 # free space allocated for &quot;k&quot; on the stack
ret     # and return
jmp0090 # &quot;else&quot; branch should be here, instead jump to 0x90 (next instruction)
A:=0000 # store 0 to A as return value
pop0001 # free space allocated for &quot;k&quot; on the stack
ret     # and return
ret     # remember about double-return for safety? ;)
</code></pre>

<p>Yeah, the code is so dirty and bloated. But it works. And which is more
important, you know now how compilers work and how create your own one.</p>

<p>But I should warn you&hellip;</p>

<h2>warning</h2>

<p>Never, please, never do it this way! If you want to write your own compiler -
use real grown-up tools:</p>

<ul>
<li>flex/lex/jlex/&hellip;</li>
<li>yacc/bison/cup&hellip;</li>
<li>ANTLR</li>
<li>Ragel</li>
<li>and many others</li>
</ul>

<p>Also, it&rsquo;s worth checking real literature, like <a href="http://web.archive.org/web/20160511234108/http://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools">Dragon Book</a>. Maybe the
courses from <a href="http://web.archive.org/web/20160511234108/https://www.coursera.org/course/compilers">coursera.org</a> will be useful for you, too.</p>

<p>If you need to port existing languages for your own architecture -
you&rsquo;d better learn how to write LLVM backends or GCC backends.</p>

<p>If you want to read more about toy compilers - check <a href="http://web.archive.org/web/20160511234108/http://en.wikipedia.org/wiki/Small-C">SmallC</a>.</p>

<p>If you want to write compiler for a simple language - check PL/0 or Basic or C.</p>

<p>But please, never write compilers manually for real tasks.</p>

<h2>final word</h2>

<p>The full sources of the compiler are available on <a href="http://web.archive.org/web/20160511234108/https://bitbucket.org/zserge/cucu">my bitbucket page</a>.  They
are licensed under MIT, feel free to use and modify.  I&rsquo;m not sure if there is
any sense in submitting issues to this project, so do it only if you know how
to fix them :)</p>

<p>Anyway, compilers is fun. I hope you liked it!</p>

<p><em>Check <a href="/web/20160511234108/http://zserge.com/blog/cucu-part1.html">part 1</a> or <a href="/web/20160511234108/http://zserge.com/blog/cucu-part2.html">part 2</a> if
you missed something</em></p>

		<p>Posted on 2012-10-25</p>
		<section class="social">
			<section>
				<a href="http://web.archive.org/web/20160511234108/http://www.facebook.com/share.php?u=http://zserge.com/blog/cucu-part3.html&amp;title=cucu:%20a%20compiler%20you%20can%20understand%20%283/3%29" style="background-color: #3b5997">like</a>
				&nbsp;
				<a href="http://web.archive.org/web/20160511234108/http://twitter.com/intent/tweet?status=cucu:%20a%20compiler%20you%20can%20understand%20%283/3%29+http://zserge.com/blog/cucu-part3.html%20via%20@zsergo" style="background-color: #41b7d8">tweet</a>
				&nbsp;
				<a href="http://web.archive.org/web/20160511234108/https://plus.google.com/share?url=http://zserge.com/blog/cucu-part3.html" style="background-color: #d64937">+1</a>
			</section>
			<section>
				<a href="/web/20160511234108/http://zserge.com/rss.xml" style="background-color: #f26522">rss</a>
				&nbsp;
				<a href="http://web.archive.org/web/20160511234108/https://twitter.com/zsergo" style="background-color: #41b7d8">@me</a>
				&nbsp;
				<a href="http://web.archive.org/web/20160511234108/https://plus.google.com/u/0/+SergeZaitsev" style="background-color: #d64937">+me</a>
				&nbsp;
				<a href="http://web.archive.org/web/20160511234108/https://github.com/zserge" style="background-color: #333333">&lt;/&gt;me</a>
			</section>
		</section>
		<section><div id="disqus_thread"></div><script type="text/javascript">
var disqus_shortname='zserge';
	(function() {
		var dsq = document.createElement('script');
		dsq.type = 'text/javascript';
		dsq.async = true;
		dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
		(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
		})();
	</script>
<noscript>
</noscript>

</section>
		<footer>
			<p>
				&copy;2012&ndash;2015 &middot;
				<a href="http://web.archive.org/web/20160511234108/http://zserge.com/">Serge Zaitsev</a>
				&middot; 
				<a href="http://web.archive.org/web/20160511234108/mailto:zaitsev.serge@gmail.com">zaitsev.serge@gmail.com</a>
			</p>
		</footer>
		<script>
	(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
		(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
	m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
	})(window,document,'script','//web.archive.org/web/20160511234108/http://www.google-analytics.com/analytics.js','ga');
	ga('create', 'UA-33644825-1', 'zserge.com');
	ga('send', 'pageview');
</script>


	</body>
</html>
<!--
     FILE ARCHIVED ON 23:41:08 May 11, 2016 AND RETRIEVED FROM THE
     INTERNET ARCHIVE ON 17:20:59 Jan 12, 2024.
     JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE.

     ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C.
     SECTION 108(a)(3)).
-->
<!--
playback timings (ms):
  captures_list: 195.079 (11)
  exclusion.robots: 0.211
  exclusion.robots.policy: 0.2
  cdx.remote: 0.08
  esindex: 0.012
  LoadShardBlock: 150.869 (3)
  PetaboxLoader3.datanode: 135.173 (4)
  load_resource: 86.265
  PetaboxLoader3.resolve: 65.917
-->