commit c57ba5281c2a026ea671a56411ef8c0956f5836d
parent 1a41782c6970d2f794df42b7805dccd7ed8a6bbd
Author: erai <erai@omiltem.net>
Date: Sat, 16 Mar 2024 11:39:46 -0400
Refactor into separate parse, lex, compile stages
Diffstat:
M | .gitignore | | | 3 | ++- |
A | alloc.c | | | 62 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | as.c | | | 960 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
D | buildlex.sh | | | 10 | ---------- |
M | cc0.c | | | 28 | +++++++++++++++++++--------- |
M | cc1.c | | | 3774 | ++++++------------------------------------------------------------------------- |
A | cc3.y | | | 152 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | genlex.c | | | 1115 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
D | lex.c | | | 1289 | ------------------------------------------------------------------------------- |
A | lex1.c | | | 408 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | lib.c | | | 117 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | parse1.c | | | 1482 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | syscall.c | | | 34 | ++++++++++++++++++++++++++++++++++ |
A | type.c | | | 186 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
14 files changed, 4805 insertions(+), 4815 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
cc0
cc1
cc2
-lex
+genlex
+gencc
diff --git a/alloc.c b/alloc.c
@@ -0,0 +1,62 @@
+struct page {
+ ptr: *byte;
+ fill: int;
+ size: int;
+}
+
+struct alloc {
+ page: *page;
+}
+
+setup_alloc(c: *alloc) {
+ c.page = 0: *page;
+}
+
+alloc(c: *alloc, size: int): *byte {
+ var page: *page;
+ var mret: int;
+ var ret: *byte;
+ var psize: int;
+
+ if (size < 0) {
+ die("invalid alloc");
+ }
+
+ if (size >= 2048) {
+ size = size + 4095;
+ size = size & ~4095;
+ mret = mmap(0, size, 3, 0x22, -1, 0);
+ if (mret == -1) {
+ die("out of memory");
+ }
+ ret = mret: *byte;
+ return ret;
+ }
+
+ page = c.page;
+ if (page) {
+ if (size <= page.size - page.fill) {
+ mret = page.ptr:int + page.fill;
+ page.fill = page.fill + size;
+ ret = mret: *byte;
+ return ret;
+ }
+ }
+
+ psize = 64 * 1024;
+
+ mret = mmap(0, psize, 3, 0x22, -1, 0);
+ if (mret == -1) {
+ die("out of memory");
+ }
+
+ page = mret: *page;
+ page.ptr = (&page[1]): *byte;
+ ret = page.ptr;
+ page.size = psize - sizeof(*page);
+ page.fill = size;
+
+ c.page = page;
+
+ return ret;
+}
diff --git a/as.c b/as.c
@@ -0,0 +1,960 @@
+struct fixup {
+ next: *fixup;
+ ptr: *byte;
+ at: int;
+}
+
+struct label {
+ fix: *fixup;
+ at: int;
+ fixed: int;
+}
+
+struct chunk {
+ next: *chunk;
+ buf: *byte;
+ fill: int;
+ cap: int;
+}
+
+struct assembler {
+ a: *alloc;
+ fdout: int;
+ at: int;
+ text: *chunk;
+ text_end: *chunk;
+}
+
+setup_assembler(a: *alloc): *assembler {
+ var c: *assembler;
+ c = alloc(a, sizeof(*c)): *assembler;
+ c.a = a;
+ c.fdout = 1;
+ c.at = 0;
+ c.text = 0:*chunk;
+ c.text_end = 0:*chunk;
+ return c;
+}
+
+putchar(c: *assembler, ch: int) {
+ fdputc(c.fdout, ch);
+}
+
+open_output(c: *assembler, filename: *byte) {
+ var fd: int;
+
+ if (c.fdout != 1) {
+ die("multiple output files");
+ }
+
+ unlink(filename);
+
+ fd = open(filename, 64 + 1, (7 << 6) + (7 << 3) +7);
+ if (fd < 0) {
+ die("failed to open output");
+ }
+
+ c.fdout = fd;
+}
+
+// Create a new label
+mklabel(c: *assembler): *label {
+ var l: *label;
+
+ l = alloc(c.a, sizeof(*l)):*label;
+
+ l.fix = 0:*fixup;
+ l.at = 0;
+ l.fixed = 0;
+
+ return l;
+}
+
+// Reserve size in the output buffer
+reserve(c: *assembler, n: int) {
+ var m: *byte;
+ var b: *chunk;
+
+ if (c.text_end && c.text_end.cap - c.text_end.fill >= n) {
+ return;
+ }
+
+ if (n < 4096) {
+ n = 4096;
+ }
+
+ m = alloc(c.a, n);
+ b = alloc(c.a, sizeof(*b)):*chunk;
+
+ b.buf = m;
+ b.fill = 0;
+ b.cap = n;
+ b.next = 0:*chunk;
+
+ if (c.text_end) {
+ c.text_end.next = b;
+ c.text_end = b;
+ } else {
+ c.text = b;
+ c.text_end = b;
+ }
+}
+
+// Add a single byte to the output
+emit(c: *assembler, x: int) {
+ reserve(c, 1);
+ c.text_end.buf[c.text_end.fill] = x:byte;
+ c.text_end.fill = c.text_end.fill + 1;
+ c.at = c.at + 1;
+}
+
+// Fix a single reference
+fixup(c: *assembler, here: *byte, delta: int) {
+ here[0] = delta: byte;
+ here[1] = (delta >> 8): byte;
+ here[2] = (delta >> 16): byte;
+ here[3] = (delta >> 24): byte;
+}
+
+// Add an new fixup for the current position
+addfixup(c: *assembler, l: *label) {
+ var f: *fixup;
+ var here: *byte;
+
+ reserve(c, 4);
+
+ here = &c.text_end.buf[c.text_end.fill];
+
+ emit(c, 0);
+ emit(c, 0);
+ emit(c, 0);
+ emit(c, 0);
+
+ if (l.fixed) {
+ fixup(c, here, l.at - c.at);
+ } else {
+ f = alloc(c.a, sizeof(*f)): *fixup;
+
+ f.next = l.fix;
+ f.ptr = here;
+ f.at = c.at;
+
+ l.fix = f;
+ }
+}
+
+// Fix references to a label to the current position
+fixup_label(c: *assembler, l: *label) {
+ var f: *fixup;
+
+ if (l.fixed) {
+ die("already fixed");
+ }
+
+ l.at = c.at;
+ l.fixed = 1;
+
+ f = l.fix;
+ loop {
+ if (!f) {
+ break;
+ }
+ fixup(c, f.ptr, l.at - f.at);
+ f = f.next;
+ }
+}
+
+emit_ptr(c: *assembler, l: *label) {
+ // lea %rax, [l]
+ emit(c, 0x48);
+ emit(c, 0x8d);
+ emit(c, 0x05);
+ addfixup(c, l);
+ // push %rax
+ emit(c, 0x50);
+}
+
+emit_jmp(c: *assembler, l: *label) {
+ // jmp l
+ emit(c, 0xe9);
+ addfixup(c, l);
+}
+
+emit_num(c: *assembler, x: int) {
+ // push x
+ emit(c, 0x68);
+ emit(c, x);
+ emit(c, x >> 8);
+ emit(c, x >> 16);
+ emit(c, x >> 24);
+}
+
+emit_str(c: *assembler, s: *byte) {
+ var a: *label;
+ var b: *label;
+ var i: int;
+
+ a = mklabel(c);
+ b = mklabel(c);
+
+ // jmp b
+ emit_jmp(c, b);
+
+ // a:
+ fixup_label(c, a);
+
+ i = 0;
+ // .string s
+ loop {
+ if (!s[i]) {
+ break;
+ }
+ emit(c, s[i]:int);
+ i = i + 1;
+ }
+ emit(c, 0);
+
+ // nop sled
+ emit(c, 0x90);
+ emit(c, 0x90);
+ emit(c, 0x90);
+ emit(c, 0x90);
+ emit(c, 0x90);
+ emit(c, 0x90);
+ emit(c, 0x90);
+ emit(c, 0x90);
+
+ // b:
+ fixup_label(c, b);
+ // push a
+ emit_ptr(c, a);
+}
+
+emit_pop(c: *assembler, n: int) {
+ n = n * 8;
+ // add rsp, 8*n
+ emit(c, 0x48);
+ emit(c, 0x81);
+ emit(c, 0xc4);
+ emit(c, n);
+ emit(c, n >> 8);
+ emit(c, n >> 16);
+ emit(c, n >> 24);
+}
+
+emit_preamble(c: *assembler, n: int, start: int) {
+ var i: int;
+ if (start) {
+ // xor rbp, rbp
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xed);
+ // mov rdi, [rsp]
+ emit(c, 0x48);
+ emit(c, 0x8b);
+ emit(c, 0x3c);
+ emit(c, 0x24);
+ // lea rsi, [rsp + 8]
+ emit(c, 0x48);
+ emit(c, 0x8d);
+ emit(c, 0x74);
+ emit(c, 0x24);
+ emit(c, 0x08);
+ // lea rdx, [rsi + rdi * 8 + 8]
+ emit(c, 0x48);
+ emit(c, 0x8d);
+ emit(c, 0x54);
+ emit(c, 0xfe);
+ emit(c, 0x08);
+ // push rdx
+ emit(c, 0x52);
+ // push rsi
+ emit(c, 0x56);
+ // push rdi
+ emit(c, 0x57);
+ // push rbp
+ emit(c, 0x55);
+ }
+ // push rbp
+ emit(c, 0x55);
+ // mov rbp, rsp
+ emit(c, 0x48);
+ emit(c, 0x89);
+ emit(c, 0xe5);
+ i = 0;
+ loop {
+ if (i >= n) {
+ break;
+ }
+ emit_num(c, 0);
+ i = i + 8;
+ }
+}
+
+emit_store(c: *assembler, t: *type) {
+ // pop rdi
+ emit(c, 0x5f);
+ // pop rax
+ emit(c, 0x58);
+ if (t.kind == TY_BYTE) {
+ // mov [rdi], al
+ emit(c, 0x88);
+ emit(c, 0x07);
+ } else if (type_isprim(t)) {
+ // mov [rdi], rax
+ emit(c, 0x48);
+ emit(c, 0x89);
+ emit(c, 0x07);
+ } else {
+ die("invalid store");
+ }
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_load(c: *assembler, t: *type) {
+ // pop rdi
+ emit(c, 0x5f);
+ if (t.kind == TY_BYTE) {
+ // xor rax, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xc0);
+ // mov al, [rdi]
+ emit(c, 0x8a);
+ emit(c, 0x07);
+ } else if (type_isprim(t)) {
+ // mov rax, [rdi]
+ emit(c, 0x48);
+ emit(c, 0x8b);
+ emit(c, 0x07);
+ } else {
+ die("invalid load");
+ }
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_jz(c: *assembler, l: *label) {
+ // pop rax
+ emit(c, 0x58);
+ // test rax, rax
+ emit(c, 0x48);
+ emit(c, 0x85);
+ emit(c, 0xc0);
+ // jz no
+ emit(c, 0x0f);
+ emit(c, 0x84);
+ addfixup(c, l);
+}
+
+emit_lea(c: *assembler, offset: int) {
+ // lea rax, [rbp + offset]
+ emit(c, 0x48);
+ emit(c, 0x8d);
+ emit(c, 0x85);
+ emit(c, offset);
+ emit(c, offset >> 8);
+ emit(c, offset >> 16);
+ emit(c, offset >> 24);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_and(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rdx
+ emit(c, 0x5a);
+ // and rdx, rax
+ emit(c, 0x48);
+ emit(c, 0x21);
+ emit(c, 0xd0);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_or(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rdx
+ emit(c, 0x5a);
+ // or rdx, rax
+ emit(c, 0x48);
+ emit(c, 0x09);
+ emit(c, 0xd0);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_xor(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rdx
+ emit(c, 0x5a);
+ // xor rdx, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xd0);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_add(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rdx
+ emit(c, 0x5a);
+ // add rdx, rax
+ emit(c, 0x48);
+ emit(c, 0x01);
+ emit(c, 0xd0);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_ret(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // mov rsp, rbp
+ emit(c, 0x48);
+ emit(c, 0x89);
+ emit(c, 0xec);
+ // pop rbp
+ emit(c, 0x5d);
+ // ret
+ emit(c, 0xc3);
+}
+
+emit_call(c: *assembler, n: int) {
+ // pop rax
+ emit(c, 0x58);
+ // call rax
+ emit(c, 0xff);
+ emit(c, 0xd0);
+ // add rsp, 8*(n+1)
+ emit_pop(c, n);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_gt(c: *assembler) {
+ // pop rdx
+ emit(c, 0x5a);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rax, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xc0);
+ // cmp rdx, rcx
+ emit(c, 0x48);
+ emit(c, 0x39);
+ emit(c, 0xca);
+ // setg al
+ emit(c, 0x0f);
+ emit(c, 0x9f);
+ emit(c, 0xc0);
+ // mov rax
+ emit(c, 0x50);
+}
+
+emit_lt(c: *assembler) {
+ // pop rdx
+ emit(c, 0x5a);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rax, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xc0);
+ // cmp rdx, rcx
+ emit(c, 0x48);
+ emit(c, 0x39);
+ emit(c, 0xca);
+ // setl al
+ emit(c, 0x0f);
+ emit(c, 0x9c);
+ emit(c, 0xc0);
+ // mov rax
+ emit(c, 0x50);
+}
+
+emit_ge(c: *assembler) {
+ // pop rdx
+ emit(c, 0x5a);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rax, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xc0);
+ // cmp rdx, rcx
+ emit(c, 0x48);
+ emit(c, 0x39);
+ emit(c, 0xca);
+ // setge al
+ emit(c, 0x0f);
+ emit(c, 0x9d);
+ emit(c, 0xc0);
+ // mov rax
+ emit(c, 0x50);
+}
+
+emit_le(c: *assembler) {
+ // pop rdx
+ emit(c, 0x5a);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rax, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xc0);
+ // cmp rdx, rcx
+ emit(c, 0x48);
+ emit(c, 0x39);
+ emit(c, 0xca);
+ // setle al
+ emit(c, 0x0f);
+ emit(c, 0x9e);
+ emit(c, 0xc0);
+ // mov rax
+ emit(c, 0x50);
+}
+
+emit_eq(c: *assembler) {
+ // pop rdx
+ emit(c, 0x5a);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rax, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xc0);
+ // cmp rdx, rcx
+ emit(c, 0x48);
+ emit(c, 0x39);
+ emit(c, 0xca);
+ // sete al
+ emit(c, 0x0f);
+ emit(c, 0x94);
+ emit(c, 0xc0);
+ // mov rax
+ emit(c, 0x50);
+}
+
+emit_ne(c: *assembler) {
+ // pop rdx
+ emit(c, 0x5a);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rax, rax
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xc0);
+ // cmp rdx, rcx
+ emit(c, 0x48);
+ emit(c, 0x39);
+ emit(c, 0xca);
+ // setne al
+ emit(c, 0x0f);
+ emit(c, 0x95);
+ emit(c, 0xc0);
+ // mov rax
+ emit(c, 0x50);
+}
+
+emit_sub(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rdx
+ emit(c, 0x5a);
+ // add rax, rdx
+ emit(c, 0x48);
+ emit(c, 0x29);
+ emit(c, 0xd0);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_mul(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rcx
+ emit(c, 0x59);
+ // mul rcx
+ emit(c, 0x48);
+ emit(c, 0xf7);
+ emit(c, 0xe1);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_div(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rdx, rdx
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xd2);
+ // test rax, rax
+ emit(c, 0x48);
+ emit(c, 0x85);
+ emit(c, 0xc0);
+ // sets dl
+ emit(c, 0x0f);
+ emit(c, 0x98);
+ emit(c, 0xc2);
+ // neg rdx
+ emit(c, 0x48);
+ emit(c, 0xf7);
+ emit(c, 0xda);
+ // idiv rcx
+ emit(c, 0x48);
+ emit(c, 0xf7);
+ emit(c, 0xf9);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_mod(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rcx
+ emit(c, 0x59);
+ // xor rdx, rdx
+ emit(c, 0x48);
+ emit(c, 0x31);
+ emit(c, 0xd2);
+ // test rax, rax
+ emit(c, 0x48);
+ emit(c, 0x85);
+ emit(c, 0xc0);
+ // sets dl
+ emit(c, 0x0f);
+ emit(c, 0x98);
+ emit(c, 0xc2);
+ // neg rdx
+ emit(c, 0x48);
+ emit(c, 0xf7);
+ emit(c, 0xda);
+ // idiv rcx
+ emit(c, 0x48);
+ emit(c, 0xf7);
+ emit(c, 0xf9);
+ // push rdx
+ emit(c, 0x52);
+}
+
+emit_lsh(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rcx
+ emit(c, 0x59);
+ // shl rax, cl
+ emit(c, 0x48);
+ emit(c, 0xd3);
+ emit(c, 0xe0);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_rsh(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // pop rcx
+ emit(c, 0x59);
+ // shr rax, cl
+ emit(c, 0x48);
+ emit(c, 0xd3);
+ emit(c, 0xe8);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_not(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // neg rax
+ emit(c, 0x48);
+ emit(c, 0xf7);
+ emit(c, 0xd0);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_neg(c: *assembler) {
+ // pop rax
+ emit(c, 0x58);
+ // neg rax
+ emit(c, 0x48);
+ emit(c, 0xf7);
+ emit(c, 0xd8);
+ // push rax
+ emit(c, 0x50);
+}
+
+emit_syscall(c: *assembler) {
+ // mov rax, [rbp + 16]
+ emit(c, 0x48);
+ emit(c, 0x8b);
+ emit(c, 0x45);
+ emit(c, 0x10);
+ // mov rdi, [rbp + 24]
+ emit(c, 0x48);
+ emit(c, 0x8b);
+ emit(c, 0x7d);
+ emit(c, 0x18);
+ // mov rsi, [rbp + 32]
+ emit(c, 0x48);
+ emit(c, 0x8b);
+ emit(c, 0x75);
+ emit(c, 0x20);
+ // mov rdx, [rbp + 40]
+ emit(c, 0x48);
+ emit(c, 0x8b);
+ emit(c, 0x55);
+ emit(c, 0x28);
+ // mov r10, [rbp + 48]
+ emit(c, 0x4c);
+ emit(c, 0x8b);
+ emit(c, 0x55);
+ emit(c, 0x30);
+ // mov r8, [rbp + 56]
+ emit(c, 0x4c);
+ emit(c, 0x8b);
+ emit(c, 0x45);
+ emit(c, 0x38);
+ // mov r9, [rbp + 64]
+ emit(c, 0x4c);
+ emit(c, 0x8b);
+ emit(c, 0x4d);
+ emit(c, 0x40);
+ // syscall
+ emit(c, 0x0f);
+ emit(c, 0x05);
+ // push rax
+ emit(c, 0x50);
+}
+
+writeout(c: *assembler, start: *label) {
+ var b: *chunk;
+ var i: int;
+ var text_size: int;
+ var load_addr: int;
+ var entry: int;
+
+ load_addr = 0x100000;
+ text_size = c.at;
+
+ if (!start || !start.fixed) {
+ die("_start is not defined");
+ }
+
+ entry = load_addr + start.at + 128;
+ text_size = text_size + 128;
+
+ // magic
+ putchar(c, 0x7f);
+ putchar(c, 'E');
+ putchar(c, 'L');
+ putchar(c, 'F');
+
+ // class
+ putchar(c, 2);
+
+ // endian
+ putchar(c, 1);
+
+ // version
+ putchar(c, 1);
+
+ // abi
+ putchar(c, 0);
+
+ // abi version
+ putchar(c, 0);
+
+ // padding
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // type
+ putchar(c, 2);
+ putchar(c, 0);
+
+ // machine
+ putchar(c, 62);
+ putchar(c, 0);
+
+ // version
+ putchar(c, 1);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // entry point
+ putchar(c, entry);
+ putchar(c, entry >> 8);
+ putchar(c, entry >> 16);
+ putchar(c, entry >> 24);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phoff
+ putchar(c, 64);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // shoff
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // flags
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // ehsize
+ putchar(c, 64);
+ putchar(c, 0);
+
+ // phentsize
+ putchar(c, 56);
+ putchar(c, 0);
+
+ // phnum
+ putchar(c, 1);
+ putchar(c, 0);
+
+ // shentsize
+ putchar(c, 64);
+ putchar(c, 0);
+
+ // shnum
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // shstrndx
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].type
+ putchar(c, 1);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].flags
+ putchar(c, 5);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].offset
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].vaddr
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0x10);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].paddr
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].filesize
+ putchar(c, text_size);
+ putchar(c, text_size >> 8);
+ putchar(c, text_size >> 16);
+ putchar(c, text_size >> 24);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].memsize
+ putchar(c, text_size);
+ putchar(c, text_size >> 8);
+ putchar(c, text_size >> 16);
+ putchar(c, text_size >> 24);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // phdr[0].align
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+ putchar(c, 0);
+
+ // nop sled
+ putchar(c, 0x90);
+ putchar(c, 0x90);
+ putchar(c, 0x90);
+ putchar(c, 0x90);
+ putchar(c, 0x90);
+ putchar(c, 0x90);
+ putchar(c, 0x90);
+ putchar(c, 0x90);
+
+ b = c.text;
+ loop {
+ if (!b) {
+ break;
+ }
+ i = 0;
+ loop {
+ if (i >= b.fill) {
+ break;
+ }
+ putchar(c, b.buf[i]: int);
+ i = i + 1;
+ }
+ b = b.next;
+ }
+}
diff --git a/buildlex.sh b/buildlex.sh
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-set -ue
-
-: ${CC:=./cc2}
-
-${CC} < lex.c > lex
-chmod +x lex
-
-./lex < cc3.l
diff --git a/cc0.c b/cc0.c
@@ -1999,7 +1999,7 @@ decl(void)
// program := func
// | func program
struct node *
-program(void)
+program(struct node *p)
{
struct node *n;
struct node *e;
@@ -2007,7 +2007,7 @@ program(void)
d = decl();
if (!d) {
- return 0;
+ return p;
}
e = mknode(N_PROGRAM, d, 0);
@@ -2020,6 +2020,7 @@ program(void)
die("expected eof");
}
+ e->b = p;
return n;
}
@@ -4344,6 +4345,14 @@ add_stdlib(void)
// syscall
emit(0x0f);
emit(0x05);
+ // push rax
+ emit(0x50);
+ // pop rax
+ emit(0x58);
+ // mov rsp, rbp
+ emit(0x48);
+ emit(0x89);
+ emit(0xec);
// pop rbp
emit(0x5d);
// ret
@@ -4795,6 +4804,7 @@ main(int argc, char **argv)
// Setup the compiler
setup();
+ p = 0;
i = 1;
while (1) {
if (i >= argc) {
@@ -4816,19 +4826,19 @@ main(int argc, char **argv)
open_source((unsigned char *)argv[i]);
// Parse the program
- p = program();
-
- // Verify types
- typecheck(p);
-
- // Generate code
- translate(p);
+ p = program(p);
close_source();
i = i + 1;
}
+ // Verify types
+ typecheck(p);
+
+ // Generate code
+ translate(p);
+
if (fout == stdout) {
open_output((unsigned char *)"a.out");
}
diff --git a/cc1.c b/cc1.c
@@ -1,49 +1,3 @@
-syscall(n: int, a1: int, a2: int, a3: int, a4: int, a5: int, a6: int): int;
-
-struct page {
- ptr: *byte;
- fill: int;
- size: int;
-}
-
-struct node {
- kind: int;
- a: *node;
- b: *node;
- filename: *byte;
- lineno: int;
- colno: int;
- n: int;
- s: *byte;
- t: *type;
-}
-
-struct type {
- kind: int;
- st: *decl;
- val: *type;
- arg: *type;
-}
-
-struct fixup {
- next: *fixup;
- ptr: *byte;
- at: int;
-}
-
-struct label {
- fix: *fixup;
- at: int;
- fixed: int;
-}
-
-struct chunk {
- next: *chunk;
- buf: *byte;
- fill: int;
- cap: int;
-}
-
struct decl {
name: *byte;
member_name: *byte;
@@ -81,7 +35,7 @@ struct decl {
struct compiler {
// Allocator
- page: *page;
+ a: *alloc;
// Lexer
fdin: int;
@@ -95,2178 +49,54 @@ struct compiler {
tmax: int;
// Assembler
- fdout: int;
- at: int;
- text: *chunk;
- text_end: *chunk;
+ as: *assembler;
// Namespace
decls: *decl;
}
-enum {
- T_EOF,
- T_IDENT,
- T_NUM,
- T_HEX,
- T_STR,
- T_CHAR,
- T_LPAR,
- T_RPAR,
- T_LBRA,
- T_RBRA,
- T_COMMA,
- T_SEMI,
- T_COLON,
- T_STAR,
- T_DOT,
- T_NOT,
- T_ASSIGN,
- T_AMP,
- T_OR,
- T_XOR,
- T_LT,
- T_GT,
- T_LE,
- T_GE,
- T_EQ,
- T_NE,
- T_ADD,
- T_SUB,
- T_LSH,
- T_RSH,
- T_BANG,
- T_BOR,
- T_BAND,
- T_LSQ,
- T_RSQ,
- T_DIV,
- T_MOD,
-}
-
-enum {
- N_IDENT,
- N_NUM,
- N_CHAR,
- N_STR,
- N_STMTLIST,
- N_EXPRLIST,
- N_CALL,
- N_DOT,
- N_ARGLIST,
- N_FUNC,
- N_ARGDECL,
- N_FUNCDECL,
- N_PROGRAM,
- N_FUNCTYPE,
- N_PTRTYPE,
- N_STRUCT,
- N_MEMBERDECL,
- N_MEMBERLIST,
- N_CONDLIST,
- N_COND,
- N_ENUM,
- N_ENUMLIST,
- N_LOOP,
- N_BREAK,
- N_CONTINUE,
- N_RETURN,
- N_VARDECL,
- N_LABEL,
- N_GOTO,
- N_ASSIGN,
- N_SIZEOF,
- N_REF,
- N_DEREF,
- N_CAST,
- N_INDEX,
- N_LT,
- N_GT,
- N_LE,
- N_GE,
- N_EQ,
- N_NE,
- N_ADD,
- N_SUB,
- N_MUL,
- N_LSH,
- N_RSH,
- N_BNOT,
- N_BOR,
- N_BAND,
- N_AND,
- N_OR,
- N_XOR,
- N_NOT,
- N_POS,
- N_NEG,
- N_DIV,
- N_MOD,
-}
-
-enum {
- TY_VOID,
- TY_INT,
- TY_BYTE,
- TY_PTR,
- TY_ARG,
- TY_FUNC,
- TY_STRUCT,
-}
-
-exit(n: int) {
- syscall(60, n, 0, 0, 0, 0, 0);
-}
-
-_start(argc: int, argv: **byte, envp: **byte) {
- main(argc, argv, envp);
- exit(0);
-}
-
-read(fd: int, buf: *byte, n: int): int {
- return syscall(0, fd, buf: int, n, 0, 0, 0);
-}
-
-write(fd: int, buf: *byte, n: int): int {
- return syscall(1, fd, buf: int, n, 0, 0, 0);
-}
-
-open(name: *byte, flags: int, mode: int): int {
- return syscall(2, name: int, flags, mode, 0, 0, 0);
-}
-
-lseek(fd: int, offset: int, whence: int): int {
- return syscall(8, fd, offset, whence, 0, 0, 0);
-}
-
-close(fd: int): int {
- return syscall(3, fd, 0, 0, 0, 0, 0);
-}
-
-fchmod(fd: int, mode: int): int {
- return syscall(91, fd, mode, 0, 0, 0, 0);
-}
-
-rename(oldname: *byte, newname: *byte): int {
- return syscall(82, oldname: int, newname: int, 0, 0, 0, 0);
-}
-
-unlink(name: *byte): int {
- return syscall(87, name: int, 0, 0, 0, 0, 0);
-}
-
-mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int {
- return syscall(9, addr, len, prot, flags, fd, off);
-}
-
-alloc(c: *compiler, size: int): *byte {
- var page: *page;
- var mret: int;
- var ret: *byte;
- var psize: int;
-
- if (size < 0) {
- die(c, "invalid alloc");
- }
-
- if (size >= 2048) {
- size = size + 4095;
- size = size & ~4095;
- mret = mmap(0, size, 3, 0x22, -1, 0);
- if (mret == -1) {
- die(c, "out of memory");
- }
- ret = mret: *byte;
- return ret;
- }
-
- page = c.page;
- if (page) {
- if (size <= page.size - page.fill) {
- mret = page.ptr:int + page.fill;
- page.fill = page.fill + size;
- ret = mret: *byte;
- return ret;
- }
- }
-
- psize = 64 * 1024;
-
- mret = mmap(0, psize, 3, 0x22, -1, 0);
- if (mret == -1) {
- die(c, "out of memory");
- }
-
- page = mret: *page;
- page.ptr = (&page[1]): *byte;
- ret = page.ptr;
- page.size = psize - sizeof(*page);
- page.fill = size;
-
- c.page = page;
-
- return ret;
-}
-
-getchar(c: *compiler): int {
- var b: byte;
- var ret: int;
- ret = read(c.fdin, &b, 1);
- if (ret < 0) {
- exit(3);
- }
- if (ret == 0) {
- return -1;
- }
- return b: int;
-}
-
-putchar(c: *compiler, ch: int) {
- var b: byte;
- var ret: int;
- b = ch: byte;
- ret = write(c.fdout, &b, 1);
- if (ret != 1) {
- exit(3);
- }
-}
-
-strlen(s: *byte): int {
- var ret: int;
- ret = 0;
- loop {
- if (s[ret] == 0:byte) {
- return ret;
- }
- ret = ret + 1;
- }
-}
-
-strcmp(a: *byte, b: *byte): int {
- var i: int;
-
- i = 0;
-
- loop {
- if (a[i] > b[i]) {
- return 1;
- }
-
- if (a[i] < b[i]) {
- return -1;
- }
-
- if (a[i] == 0:byte) {
- return 0;
- }
-
- i = i + 1;
- }
-}
-
-fdputc(fd: int, ch: int) {
- var b: byte;
- var ret: int;
- b = ch: byte;
- ret = write(fd, &b, 1);
- if (ret != 1) {
- exit(3);
- }
-}
-
-fdput(fd: int, msg: *byte) {
- var len: int;
- var ret: int;
- var off: int;
- len = strlen(msg);
- off = 0;
- loop {
- if (off == len) {
- break;
- }
- ret = write(fd, msg, len - off);
- if (ret < 0) {
- exit(3);
- }
- off = off + ret;
- }
-}
-
-fdputh(fd: int, n: int) {
- var c: int;
- var r: int;
-
- r = 0;
- loop {
- if (n == 0) {
- break;
- }
-
- r = (r << 4) + (n & 15);
- n = n >> 4;
- }
-
- n = r;
-
- loop {
- c = n & 15;
- n = n >> 4;
-
- if (c < 10) {
- fdputc(fd, c + '0');
- } else {
- fdputc(fd, c + ('a' - 10));
- }
-
- if (n == 0) {
- break;
- }
- }
-}
-
-fdputd(fd: int, n: int) {
- var a: int;
-
- if (n < 0) {
- fdputc(fd, '-');
- a = -(n % 10);
- n = n / -10;
- } else {
- a = n % 10;
- n = n / 10;
- }
-
- if (n != 0) {
- fdputd(fd, n);
- }
-
- fdputc(fd, '0' + a);
-}
-
show_context(c: *compiler) {
- fdput(2, "on ");
+ fdputs(2, "on ");
if (c.filename) {
- fdput(2, c.filename);
+ fdputs(2, c.filename);
}
- fdput(2, ":");
+ fdputs(2, ":");
fdputd(2, c.lineno);
- fdput(2, ":");
+ fdputs(2, ":");
fdputd(2, c.colno);
- fdput(2, "\n");
-}
-
-die(c: *compiler, msg: *byte) {
- show_context(c);
- fdput(2, "die: ");
- fdput(2, msg);
- fdput(2, "\n");
- exit(1);
-}
-
-open_output(c: *compiler, filename: *byte) {
- var fd: int;
-
- c.filename = filename;
-
- if (c.fdout != 1) {
- die(c, "multiple output files");
- }
-
- unlink(filename);
-
- fd = open(filename, 64 + 1, (7 << 6) + (7 << 3) + 7);
- if (fd < 0) {
- die(c, "failed to open output");
- }
-
- c.fdout = fd;
-}
-
-open_source(c: *compiler, filename: *byte) {
- var fd: int;
-
- c.filename = filename;
- c.nc = 0;
- c.lineno = 1;
- c.colno = 1;
- c.tlen = 0;
- c.tt = 0;
-
- fd = open(filename, 0, 0);
- if (fd < 0) {
- die(c, "failed to open file");
- }
-
- c.fdin = fd;
- c.nc = getchar(c);
-
- feed(c);
-}
-
-close_source(c: *compiler) {
- if (c.fdin != 0) {
- close(c.fdin);
- }
- c.fdin = 0;
-}
-
-comp_setup(c: *compiler) {
- c.page = 0:*page;
-
- c.fdin = 0;
- c.nc = 0;
- c.filename = 0:*byte;
- c.lineno = 1;
- c.colno = 1;
- c.tlen = 0;
- c.tmax = 4096;
- c.token = alloc(c, c.tmax);
- c.tt = 0;
-
- c.fdout = 1;
- c.at = 0;
- c.text = 0:*chunk;
- c.text_end = 0:*chunk;
-
- c.decls = 0:*decl;
-}
-
-feedc(c: *compiler) {
- c.nc = getchar(c);
- if (c.nc == '\n') {
- c.lineno = c.lineno + 1;
- c.colno = 0;
- }
- c.colno = c.colno + 1;
-}
-
-tappend(c: *compiler) {
- c.token[c.tlen] = c.nc:byte;
- c.tlen = c.tlen + 1;
- if (c.tlen == c.tmax) {
- die(c, "identifier too long");
- }
- c.token[c.tlen] = 0:byte;
- feedc(c);
-}
-
-feed(c: *compiler) {
- c.tlen = 0;
- c.token[0] = 0:byte;
-
- loop {
- if (c.nc == -1) {
- // Reached the end of input
- c.tt = T_EOF;
- return;
- } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r') {
- // Whitespace
- feedc(c);
- } else if (c.nc == '\n') {
- // Line end
- feedc(c);
- } else if (c.nc == '/') {
- // Comment
- feedc(c);
- if (c.nc == '/') {
- // Read until the end of the comment
- loop {
- if (c.nc == '\n' || c.nc == -1) {
- break;
- }
- feedc(c);
- }
- } else {
- c.tt = T_DIV;
- return;
- }
-
- } else {
- // Start of a real token
- break;
- }
- }
-
- // Identifier
- if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') {
- feed_ident(c);
- return;
- }
-
- // String
- if (c.nc == '"') {
- feed_str(c);
- return;
- }
-
- // Character
- if (c.nc == '\'') {
- feed_char(c);
- return;
- }
-
- // Number
- if (c.nc >= '0' && c.nc <= '9') {
- feed_num(c);
- return;
- }
-
- // Single character tokens
- if (c.nc == '(') {
- c.tt = T_LPAR;
- feedc(c);
- } else if (c.nc == ')') {
- c.tt = T_RPAR;
- feedc(c);
- } else if (c.nc == '{') {
- c.tt = T_LBRA;
- feedc(c);
- } else if (c.nc == '}') {
- c.tt = T_RBRA;
- feedc(c);
- } else if (c.nc == ',') {
- c.tt = T_COMMA;
- feedc(c);
- } else if (c.nc == ';') {
- c.tt = T_SEMI;
- feedc(c);
- } else if (c.nc == ':') {
- c.tt = T_COLON;
- feedc(c);
- } else if (c.nc == '*') {
- c.tt = T_STAR;
- feedc(c);
- } else if (c.nc == '.') {
- c.tt = T_DOT;
- feedc(c);
- } else if (c.nc == '=') {
- c.tt = T_ASSIGN;
- feedc(c);
- if (c.nc == '=') {
- c.tt = T_EQ;
- feedc(c);
- }
- } else if (c.nc == '&') {
- c.tt = T_AMP;
- feedc(c);
- if (c.nc == '&') {
- c.tt = T_BAND;
- feedc(c);
- }
- } else if (c.nc == '~') {
- c.tt = T_NOT;
- feedc(c);
- } else if (c.nc == '|') {
- c.tt = T_OR;
- feedc(c);
- if (c.nc == '|') {
- c.tt = T_BOR;
- feedc(c);
- }
- } else if (c.nc == '^') {
- c.tt = T_XOR;
- feedc(c);
- } else if (c.nc == '!') {
- c.tt = T_BANG;
- feedc(c);
- if (c.nc == '=') {
- c.tt = T_NE;
- feedc(c);
- }
- } else if (c.nc == '<') {
- c.tt = T_LT;
- feedc(c);
- if (c.nc == '<') {
- c.tt = T_LSH;
- feedc(c);
- } else if (c.nc == '=') {
- c.tt = T_LE;
- feedc(c);
- }
- } else if (c.nc == '>') {
- c.tt = T_GT;
- feedc(c);
- if (c.nc == '>') {
- c.tt = T_RSH;
- feedc(c);
- } else if (c.nc == '=') {
- c.tt = T_GE;
- feedc(c);
- }
- } else if (c.nc == '[') {
- c.tt = T_LSQ;
- feedc(c);
- } else if (c.nc == ']') {
- c.tt = T_RSQ;
- feedc(c);
- } else if (c.nc == '+') {
- c.tt = T_ADD;
- feedc(c);
- } else if (c.nc == '-') {
- c.tt = T_SUB;
- feedc(c);
- } else if (c.nc == '%') {
- c.tt = T_MOD;
- feedc(c);
- } else {
- die(c, "invalid char");
- }
-}
-
-feed_ident(c: *compiler) {
- c.tt = T_IDENT;
- loop {
- if (!((c.nc >= 'a' && c.nc <= 'z') ||
- (c.nc >= 'A' && c.nc <= 'Z') ||
- (c.nc >= '0' && c.nc <= '9') ||
- c.nc == '_')) {
- break;
- }
- tappend(c);
- }
-}
-
-hexdig(c: *compiler): int {
- if (c.nc >= '0' && c.nc <= '9') {
- return c.nc - '0';
- }
-
- if (c.nc >= 'A' && c.nc <= 'F') {
- return (c.nc - 'F') + 10;
- }
-
- if (c.nc >= 'a' && c.nc <= 'f') {
- return (c.nc - 'a') + 10;
- }
-
- die(c, "invalid hex digit");
- return 0;
-}
-
-feed_escape(c: *compiler) {
- var hex: int;
-
- // backslash
- feedc(c);
-
- if (c.nc == 't') {
- c.nc = '\t';
- } else if (c.nc == 'r') {
- c.nc = '\r';
- } else if (c.nc == 'n') {
- c.nc = '\n';
- } else if (c.nc == 'x') {
- c.nc = getchar(c);
- hex = hexdig(c) * 16;
-
- c.nc = getchar(c);
- hex = hex + hexdig(c);
-
- c.nc = hex;
- } else if (c.nc != '\\' && c.nc != '\'' && c.nc != '"') {
- die(c, "invalid escape");
- }
-}
-
-feed_str(c: *compiler) {
- c.tt = T_STR;
-
- // quote
- feedc(c);
-
- loop {
- if (c.nc == '"') {
- feedc(c);
- return;
- }
-
- if (c.nc == -1 || c.nc == 0 || c.nc == '\n') {
- die(c, "invalid char in string");
- }
-
- if (c.tlen == c.tmax) {
- die(c, "string too long");
- }
-
- if (c.nc == '\\') {
- feed_escape(c);
- }
-
- tappend(c);
- }
-}
-
-feed_char(c: *compiler) {
- c.tt = T_CHAR;
-
- // quote
- feedc(c);
-
- if (c.nc == 0 || c.nc == -1 || c.nc == '\'' || c.nc == '\n') {
- die(c, "invalid char");
- }
-
- if (c.nc == '\\') {
- feed_escape(c);
- }
-
- tappend(c);
-
- if (c.nc != '\'') {
- die(c, "expected '");
- }
-
- feedc(c);
-}
-
-feed_hex(c: *compiler) {
- c.tt = T_HEX;
-
- loop {
- if (!((c.nc >= '0' && c.nc <= '9')
- || (c.nc >= 'a' && c.nc <= 'f')
- || (c.nc >= 'A' && c.nc <= 'F'))) {
- break;
- }
-
- tappend(c);
- }
-
- if (c.tlen == 0) {
- die(c, "expected hex");
- }
-}
-
-feed_num(c: *compiler) {
- c.tt = T_NUM;
-
- if (c.nc == '0') {
- tappend(c);
-
- if (c.nc == 'x') {
- feedc(c);
- feed_hex(c);
- return;
- }
- }
-
- loop {
- if (!(c.nc >= '0' && c.nc <= '9')) {
- break;
- }
-
- tappend(c);
- }
-}
-
-mknode(c: *compiler, kind: int, a: *node, b: *node): *node {
- var ret: *node;
- ret = alloc(c, sizeof(*ret)):*node;
- ret.kind = kind;
- ret.a = a;
- ret.b = b;
- ret.filename = c.filename;
- ret.lineno = c.lineno;
- ret.colno = c.colno;
- ret.n = 0;
- ret.s = 0:*byte;
- ret.t = 0:*type;
- return ret;
-}
-
-mknode0(c: *compiler, kind: int): *node {
- return mknode(c, kind, 0:*node, 0:*node);
-}
-
-mknode1(c: *compiler, kind: int, a: *node): *node {
- return mknode(c, kind, a, 0:*node);
-}
-
-// Copy the current token
-intern(c: *compiler): *byte {
- var ret: *byte;
- var i: int;
-
- ret = alloc(c, c.tlen + 1);
-
- i = 0;
- loop {
- if (i == c.tlen) {
- ret[i] = 0:byte;
- return ret;
- }
-
- ret[i] = c.token[i];
-
- i = i + 1;
- }
-}
-
-// ident := IDENT
-parse_ident(c: *compiler): *node {
- var n: *node;
-
- if (c.tt != T_IDENT) {
- return 0:*node;
- }
-
- n = mknode0(c, N_IDENT);
- n.s = intern(c);
- feed(c);
-
- return n;
-}
-
-parse_hex(c: *compiler): *node {
- var n: *node;
- var x: int;
- var d: int;
- var i: int;
-
- x = 0;
- i = 0;
- loop {
- if (i == c.tlen) {
- break;
- }
-
- d = c.token[i]:int;
-
- if (d >= '0' && d <= '9') {
- d = d - '0';
- } else if (d >= 'a' && d <= 'f') {
- d = (d - 'a') + 10;
- } else {
- d = (d - 'A') + 10;
- }
-
- x = x * 16;
- x = x + d;
- i = i + 1;
-
- if (x > 0x7fffffff) {
- die(c, "overflow");
- }
- }
-
- n = mknode0(c, N_NUM);
- n.n = x;
- feed(c);
-
- return n;
-}
-
-// num := NUM
-parse_num(c: *compiler): *node {
- var n: *node;
- var x: int;
- var d: int;
- var i: int;
-
- if (c.tt == T_HEX) {
- return parse_hex(c);
- }
-
- if (c.tt != T_NUM) {
- return 0:*node;
- }
-
- x = 0;
- i = 0;
- loop {
- if (i == c.tlen) {
- break;
- }
-
- d = (c.token[i]:int) - '0';
-
- x = x * 10;
-
- x = x + d;
- i = i + 1;
-
- if (x > 0x7fffffff) {
- die(c, "overflow");
- }
- }
-
- n = mknode0(c, N_NUM);
- n.n = x;
- feed(c);
-
- return n;
-}
-
-// chr := CHAR
-parse_chr(c: *compiler): *node {
- var n: *node;
-
- if (c.tt != T_CHAR) {
- return 0:*node;
- }
-
- n = mknode0(c, N_CHAR);
- n.n = c.token[0]:int;
- feed(c);
-
- return n;
-}
-
-// str := STR
-parse_str(c: *compiler): *node {
- var n: *node;
-
- if (c.tt != T_STR) {
- return 0:*node;
- }
-
- n = mknode0(c, N_STR);
- n.s = intern(c);
- feed(c);
-
- return n;
-}
-
-// primary := ident
-// | num
-// | str
-// | chr
-// | '(' expr ')'
-parse_primary(c: *compiler): *node {
- var n: *node;
-
- n = parse_ident(c);
- if (n) {
- return n;
- }
-
- n = parse_num(c);
- if (n) {
- return n;
- }
-
- n = parse_str(c);
- if (n) {
- return n;
- }
-
- n = parse_chr(c);
- if (n) {
- return n;
- }
-
- if (c.tt == T_LPAR) {
- feed(c);
-
- n = parse_expr(c);
- if (!n) {
- die(c, "expected expr");
- }
-
- if (c.tt != T_RPAR) {
- die(c, "expected )");
- }
- feed(c);
-
- return n;
- }
-
- return 0:*node;
-}
-
-// expr_list := expr
-// | expr ',' expr_list
-parse_expr_list(c: *compiler): *node {
- var n: *node;
- var e: *node;
- var a: *node;
-
- a = parse_expr(c);
- if (!a) {
- return 0:*node;
- }
-
- e = mknode1(c, N_EXPRLIST, a);
- n = e;
-
- loop {
- if (c.tt != T_COMMA) {
- return n;
- }
- feed(c);
-
- a = parse_expr(c);
- if (!a) {
- die(c, "expected expr");
- }
-
- e.b = mknode1(c, N_EXPRLIST, a);
- e = e.b;
- }
-}
-
-// post_expr := primary
-// | 'sizeof' '(' expr ')'
-// | post_expr '[' expr ']'
-// | post_expr '(' expr_list ')'
-// | post_expr '.' ident
-// | post_expr ':' type
-parse_post_expr(c: *compiler): *node {
- var n: *node;
- var b: *node;
-
- if (c.tt == T_IDENT && !strcmp(c.token, "sizeof")) {
- feed(c);
-
- if (c.tt != T_LPAR) {
- die(c, "expected (");
- }
- feed(c);
-
- n = parse_expr(c);
- if (!n) {
- die(c, "expected expr");
- }
-
- if (c.tt != T_RPAR) {
- die(c, "expected )");
- }
- feed(c);
-
- return mknode1(c, N_SIZEOF, n);
- }
-
- n = parse_primary(c);
- if (!n) {
- return 0:*node;
- }
-
- loop {
- if (c.tt == T_LSQ) {
- feed(c);
-
- b = parse_expr(c);
-
- if (c.tt != T_RSQ) {
- die(c, "expected ]");
- }
- feed(c);
-
- n = mknode(c, N_INDEX, n, b);
- } else if (c.tt == T_LPAR) {
- feed(c);
-
- b = parse_expr_list(c);
-
- if (c.tt != T_RPAR) {
- die(c, "expected )");
- }
- feed(c);
-
- n = mknode(c, N_CALL, n, b);
- } else if (c.tt == T_DOT) {
- feed(c);
-
- b = parse_ident(c);
- if (!b) {
- die(c, "expected ident");
- }
-
- n = mknode(c, N_DOT, n, b);
- } else if (c.tt == T_COLON) {
- feed(c);
-
- b = parse_type(c);
- if (!b) {
- die(c, "expected type");
- }
-
- n = mknode(c, N_CAST, n, b);
- } else {
- return n;
- }
- }
-}
-
-// unary_expr := post_expr
-// | '&' unary_expr
-// | '*' unary_expr
-// | '+' unary_expr
-// | '-' unary_expr
-// | '~' unary_expr
-// | '!' unary_expr
-parse_unary_expr(c: *compiler): *node {
- var n: *node;
-
- if (c.tt == T_AMP) {
- feed(c);
-
- n = parse_unary_expr(c);
- if (!n) {
- die(c, "expected unary_expr");
- }
-
- return mknode1(c, N_REF, n);
- }
-
- if (c.tt == T_STAR) {
- feed(c);
-
- n = parse_unary_expr(c);
- if (!n) {
- die(c, "expected unary_expr");
- }
-
- return mknode1(c, N_DEREF, n);
- }
-
- if (c.tt == T_ADD) {
- feed(c);
-
- n = parse_unary_expr(c);
- if (!n) {
- die(c, "expected unary_expr");
- }
-
- return mknode1(c, N_POS, n);
- }
-
- if (c.tt == T_SUB) {
- feed(c);
-
- n = parse_unary_expr(c);
- if (!n) {
- die(c, "expected unary_expr");
- }
-
- return mknode1(c, N_NEG, n);
- }
-
- if (c.tt == T_NOT) {
- feed(c);
-
- n = parse_unary_expr(c);
- if (!n) {
- die(c, "expected unary_expr");
- }
-
- return mknode1(c, N_NOT, n);
- }
-
- if (c.tt == T_BANG) {
- feed(c);
-
- n = parse_unary_expr(c);
- if (!n) {
- die(c, "expected unary_expr");
- }
-
- return mknode1(c, N_BNOT, n);
- }
-
- return parse_post_expr(c);
-}
-
-
-// shift_expr := unary_expr
-// | shift_expr '<<' unary_expr
-// | shift_expr '>>' unary_expr
-parse_shift_expr(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- a = parse_unary_expr(c);
- if (!a) {
- return 0:*node;
- }
-
- loop {
- if (c.tt == T_LSH) {
- feed(c);
-
- b = parse_unary_expr(c);
- if (!b) {
- die(c, "expected unary_expr");
- }
-
- a = mknode(c, N_LSH, a, b);
- } else if (c.tt == T_RSH) {
- feed(c);
-
- b = parse_unary_expr(c);
- if (!b) {
- die(c, "expected unary_expr");
- }
-
- a = mknode(c, N_RSH, a, b);
- } else {
- return a;
- }
- }
-}
-
-// mul_expr := shift_expr
-// | mul_expr '*' shift_expr
-// | mul_expr '/' shift_expr
-// | mul_expr '%' shift_expr
-// | mul_expr '&' shift_expr
-parse_mul_expr(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- a = parse_shift_expr(c);
- if (!a) {
- return 0:*node;
- }
-
- loop {
- if (c.tt == T_STAR) {
- feed(c);
-
- b = parse_shift_expr(c);
- if (!b) {
- die(c, "expected shift_expr");
- }
-
- a = mknode(c, N_MUL, a, b);
- } else if (c.tt == T_DIV) {
- feed(c);
-
- b = parse_shift_expr(c);
- if (!b) {
- die(c, "expected shift_expr");
- }
-
- a = mknode(c, N_DIV, a, b);
- } else if (c.tt == T_MOD) {
- feed(c);
-
- b = parse_shift_expr(c);
- if (!b) {
- die(c, "expected shift_expr");
- }
-
- a = mknode(c, N_MOD, a, b);
- } else if (c.tt == T_AMP) {
- feed(c);
-
- b = parse_shift_expr(c);
- if (!b) {
- die(c, "expected shift_expr");
- }
-
- a = mknode(c, N_AND, a, b);
- } else {
- return a;
- }
- }
-}
-
-// add_expr := mul_expr
-// | add_expr '+' mul_expr
-// | add_expr '-' mul_expr
-// | add_expr '|' mul_expr
-// | add_expr '^' mul_expr
-parse_add_expr(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- a = parse_mul_expr(c);
- if (!a) {
- return 0:*node;
- }
-
- loop {
- if (c.tt == T_ADD) {
- feed(c);
-
- b = parse_mul_expr(c);
- if (!b) {
- die(c, "expected mul_expr");
- }
-
- a = mknode(c, N_ADD, a, b);
- } else if (c.tt == T_SUB) {
- feed(c);
-
- b = parse_mul_expr(c);
- if (!b) {
- die(c, "expected mul_expr");
- }
-
- a = mknode(c, N_SUB, a, b);
- } else if (c.tt == T_OR) {
- feed(c);
-
- b = parse_mul_expr(c);
- if (!b) {
- die(c, "expected mul_expr");
- }
-
- a = mknode(c, N_OR, a, b);
- } else if (c.tt == T_XOR) {
- feed(c);
-
- b = parse_mul_expr(c);
- if (!b) {
- die(c, "expected mul_expr");
- }
-
- a = mknode(c, N_XOR, a, b);
- } else {
- return a;
- }
- }
-}
-
-// comp_expr := add_expr
-// | add_expr '<' add_expr
-// | add_expr '>' add_expr
-// | add_expr '<=' add_expr
-// | add_expr '>=' add_expr
-// | add_expr '==' add_expr
-// | add_expr '!=' add_expr
-parse_comp_expr(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- a = parse_add_expr(c);
- if (!a) {
- return 0:*node;
- }
-
- if (c.tt == T_LT) {
- feed(c);
-
- b = parse_add_expr(c);
- if (!b) {
- die(c, "expected add_expr");
- }
-
- return mknode(c, N_LT, a, b);
- }
-
- if (c.tt == T_GT) {
- feed(c);
-
- b = parse_add_expr(c);
- if (!b) {
- die(c, "expected add_expr");
- }
-
- return mknode(c, N_GT, a, b);
- }
-
- if (c.tt == T_LE) {
- feed(c);
-
- b = parse_add_expr(c);
- if (!b) {
- die(c, "expected add_expr");
- }
-
- return mknode(c, N_LE, a, b);
- }
-
- if (c.tt == T_GE) {
- feed(c);
-
- b = parse_add_expr(c);
- if (!b) {
- die(c, "expected add_expr");
- }
-
- return mknode(c, N_GE, a, b);
- }
-
- if (c.tt == T_EQ) {
- feed(c);
-
- b = parse_add_expr(c);
- if (!b) {
- die(c, "expected add_expr");
- }
-
- return mknode(c, N_EQ, a, b);
- }
-
- if (c.tt == T_NE) {
- feed(c);
-
- b = parse_add_expr(c);
- if (!b) {
- die(c, "expected add_expr");
- }
-
- return mknode(c, N_NE, a, b);
- }
-
- return a;
-}
-
-// bool_expr := bool_expr
-// | bool_expr '&&' comp_expr
-// | bool_expr '||' comp_expr
-parse_bool_expr(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- a = parse_comp_expr(c);
- if (!a) {
- return 0:*node;
- }
-
- if (c.tt == T_BAND) {
- feed(c);
-
- b = parse_bool_expr(c);
- if (!b) {
- die(c, "expected bool_expr");
- }
-
- return mknode(c, N_BAND, a, b);
- }
-
- if (c.tt == T_BOR) {
- feed(c);
-
- b = parse_bool_expr(c);
- if (!b) {
- die(c, "expected bool_expr");
- }
-
- return mknode(c, N_BOR, a, b);
- }
-
- return a;
-}
-
-// expr := bool_expr
-// | bool_expr '=' expr
-parse_expr(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- a = parse_bool_expr(c);
- if (!a) {
- return 0:*node;
- }
-
- if (c.tt == T_ASSIGN) {
- feed(c);
-
- b = parse_expr(c);
- if (!b) {
- die(c, "expected expr");
- }
-
- return mknode(c, N_ASSIGN, a, b);
- }
-
- return a;
-}
-
-// if_stmt := 'if' expr '{' stmt_list '}'
-// | 'if' expr '{' stmt_list '}' 'else' if_stmt
-// | 'if' expr '{' stmt_list '}' 'else' '{' stmt_list '}'
-parse_if_stmt(c: *compiler): *node {
- var n: *node;
- var e: *node;
- var a: *node;
- var b: *node;
-
- if (c.tt != T_IDENT || strcmp(c.token, "if")) {
- return 0:*node;
- }
- feed(c);
-
- n = mknode0(c, N_CONDLIST);
- e = n;
-
- loop {
- a = parse_expr(c);
- if !a {
- die(c, "expected expr");
- }
-
- if (c.tt != T_LBRA) {
- die(c, "expected {");
- }
- feed(c);
-
- b = parse_stmt_list(c);
-
- if (c.tt != T_RBRA) {
- die(c, "expected }");
- }
- feed(c);
-
- e.a = mknode(c, N_COND, a, b);
-
- if (c.tt != T_IDENT || strcmp(c.token, "else")) {
- return n;
- }
- feed(c);
-
- if (c.tt == T_LBRA) {
- feed(c);
-
- b = parse_stmt_list(c);
-
- if (c.tt != T_RBRA) {
- die(c, "expected }");
- }
- feed(c);
-
- e.b = mknode1(c, N_CONDLIST, mknode(c, N_COND, 0:*node, b));
-
- return n;
- }
-
- if (c.tt != T_IDENT || strcmp(c.token, "if")) {
- die(c, "expected if");
- }
- feed(c);
-
- e.b = mknode0(c, N_CONDLIST);
- e = e.b;
- }
-}
-
-// loop_stmt := 'loop' '{' stmt_list '}'
-parse_loop_stmt(c: *compiler): *node {
- var a: *node;
-
- if (c.tt != T_IDENT || strcmp(c.token, "loop")) {
- return 0:*node;
- }
- feed(c);
-
- if (c.tt != T_LBRA) {
- die(c, "expected {");
- }
- feed(c);
-
- a = parse_stmt_list(c);
-
- if (c.tt != T_RBRA) {
- die(c, "expected }");
- }
- feed(c);
-
- return mknode1(c, N_LOOP, a);
-}
-
-// break_stmt := 'break'
-parse_break_stmt(c: *compiler): *node {
- if (c.tt != T_IDENT || strcmp(c.token, "break")) {
- return 0:*node;
- }
- feed(c);
-
- return mknode0(c, N_BREAK);
-}
-
-// continue_stmt := 'continue'
-parse_continue_stmt(c: *compiler): *node {
- if (c.tt != T_IDENT || strcmp(c.token, "continue")) {
- return 0:*node;
- }
- feed(c);
-
- return mknode0(c, N_CONTINUE);
-}
-
-// return_stmt := 'return'
-// | 'return' expr
-parse_return_stmt(c: *compiler): *node {
- var a: *node;
-
- if (c.tt != T_IDENT || strcmp(c.token, "return")) {
- return 0:*node;
- }
- feed(c);
-
- a = parse_expr(c);
-
- return mknode1(c, N_RETURN, a);
-}
-
-// var_decl := ident ':' type
-parse_var_decl(c: *compiler): *node {
- var n: *node;
- var a: *node;
- var b: *node;
-
- a = parse_ident(c);
- if (!a) {
- return 0:*node;
- }
-
- if (c.tt != T_COLON) {
- die(c, "expected :");
- }
- feed(c);
-
- b = parse_type(c);
- if (!b) {
- die(c, "expected type");
- }
-
- return mknode(c, N_VARDECL, a, b);
-}
-
-// var_stmt := 'var' var_decl
-parse_var_stmt(c: *compiler): *node {
- var a: *node;
-
- if (c.tt != T_IDENT || strcmp(c.token, "var")) {
- return 0:*node;
- }
- feed(c);
-
- a = parse_var_decl(c);
- if (!a) {
- die(c, "expected var_decl");
- }
-
- return a;
-}
-
-// label_stmt := ':' ident
-parse_label_stmt(c: *compiler): *node {
- var a: *node;
-
- if (c.tt != T_COLON) {
- return 0:*node;
- }
- feed(c);
-
- a = parse_ident(c);
- if (!a) {
- die(c, "expected ident");
- }
-
- return mknode1(c, N_LABEL, a);
-}
-
-// goto_stmt := 'goto' ident
-parse_goto_stmt(c: *compiler): *node {
- var a: *node;
-
- if (c.tt != T_IDENT || strcmp(c.token, "goto")) {
- return 0:*node;
- }
- feed(c);
-
- a = parse_ident(c);
- if (!a) {
- die(c, "expected ident");
- }
-
- return mknode1(c, N_GOTO, a);
-}
-
-// stmt := if_stmt
-// | loop_stmt
-// | break_stmt ';'
-// | continue_stmt ';'
-// | return_stmt ';'
-// | var_stmt ';'
-// | label_stmt ';'
-// | goto_stmt ';'
-// | expr ';'
-parse_stmt(c: *compiler): *node {
- var n: *node;
-
- n = parse_if_stmt(c);
- if (n) {
- return n;
- }
-
- n = parse_loop_stmt(c);
- if (n) {
- return n;
- }
-
- n = parse_break_stmt(c);
- if (n) {
- if (c.tt != T_SEMI) {
- die(c, "expected ;");
- }
- feed(c);
-
- return n;
- }
-
- n = parse_return_stmt(c);
- if (n) {
- if (c.tt != T_SEMI) {
- die(c, "expected ;");
- }
- feed(c);
-
- return n;
- }
-
- n = parse_continue_stmt(c);
- if (n) {
- if (c.tt != T_SEMI) {
- die(c, "expected ;");
- }
- feed(c);
-
- return n;
- }
-
- n = parse_var_stmt(c);
- if (n) {
- if (c.tt != T_SEMI) {
- die(c, "expected ;");
- }
- feed(c);
-
- return n;
- }
-
- n = parse_label_stmt(c);
- if (n) {
- if (c.tt != T_SEMI) {
- die(c, "expected ;");
- }
- feed(c);
-
- return n;
- }
-
- n = parse_goto_stmt(c);
- if (n) {
- if (c.tt != T_SEMI) {
- die(c, "expected ;");
- }
- feed(c);
-
- return n;
- }
-
- n = parse_expr(c);
- if (n) {
- if (c.tt != T_SEMI) {
- die(c, "expected ;");
- }
- feed(c);
-
- return n;
- }
-
- return 0:*node;
-}
-
-// stmt_list := stmt
-// | stmt stmt_list
-parse_stmt_list(c: *compiler): *node {
- var n: *node;
- var e: *node;
- var a: *node;
-
- a = parse_stmt(c);
- if (!a) {
- return 0:*node;
- }
-
- e = mknode1(c, N_STMTLIST, a);
- n = e;
-
- loop {
- a = parse_stmt(c);
- if (!a) {
- return n;
- }
-
- e.b = mknode1(c, N_STMTLIST, a);
- e = e.b;
- }
-}
-
-// enum_list := ident
-// | enum_list ',' enum_list
-parse_enum_list(c: *compiler): *node {
- var n: *node;
- var e: *node;
- var a: *node;
-
- a = parse_ident(c);
- if (!a) {
- return 0:*node;
- }
-
- e = mknode1(c, N_ENUMLIST, a);
- n = e;
-
- loop {
- if (c.tt != T_COMMA) {
- return n;
- }
- feed(c);
-
- a = parse_ident(c);
- if (!a) {
- return n;
- }
-
- e.b = mknode1(c, N_ENUMLIST, a);
- e = e.b;
- }
-}
-
-// enum_decl := 'enum' ident '{' enum_list '}'
-parse_enum_decl(c: *compiler): *node {
- var b: *node;
-
- if (c.tt != T_IDENT) {
- return 0:*node;
- }
-
- if (strcmp(c.token, "enum")) {
- return 0:*node;
- }
- feed(c);
-
- if (c.tt != T_LBRA) {
- die(c, "expected {");
- }
- feed(c);
-
- b = parse_enum_list(c);
-
- if (c.tt != T_RBRA) {
- die(c, "expected }");
- }
- feed(c);
-
- return mknode(c, N_ENUM, 0:*node, b);
-}
-
-// type := ident
-// | '*' type
-// | '(' type ')'
-// | 'func' func_type
-parse_type(c: *compiler): *node {
- var n: *node;
-
- if (c.tt == T_STAR) {
- feed(c);
-
- n = parse_type(c);
-
- return mknode1(c, N_PTRTYPE, n);
- }
-
- if (c.tt == T_LPAR) {
- feed(c);
-
- n = parse_type(c);
-
- if (c.tt != T_RPAR) {
- die(c, "expected )");
- }
- feed(c);
-
- return n;
- }
-
- if (c.tt == T_IDENT && !strcmp(c.token, "func")) {
- feed(c);
-
- n = parse_func_type(c);
- if (!n) {
- die(c, "expected func_type");
- }
-
- return n;
- }
-
- return parse_ident(c);
-}
-
-// member_decl := ident ':' type
-parse_member_decl(c: *compiler): *node {
- var n: *node;
- var a: *node;
- var b: *node;
-
- a = parse_ident(c);
- if (!a) {
- return 0: *node;
- }
-
- if (c.tt != T_COLON) {
- die(c, "expected :");
- }
- feed(c);
-
- b = parse_type(c);
- if (!b) {
- die(c, "expected type");
- }
-
- return mknode(c, N_MEMBERDECL, a, b);
-}
-
-// member_list := member_decl
-// | member_decl ';' member_list
-parse_member_list(c: *compiler): *node {
- var n: *node;
- var e: *node;
- var a: *node;
-
- a = parse_member_decl(c);
- if (!a) {
- return 0:*node;
- }
-
- e = mknode1(c, N_MEMBERLIST, a);
- n = e;
-
- loop {
- if (c.tt != T_SEMI) {
- return n;
- }
- feed(c);
-
- a = parse_member_decl(c);
- if (!a) {
- return n;
- }
-
- e.b = mknode1(c, N_MEMBERLIST, a);
- e = e.b;
- }
-}
-
-// struct_decl := 'struct' ident '{' member_list '}'
-parse_struct_decl(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- if (c.tt != T_IDENT) {
- return 0:*node;
- }
-
- if (strcmp(c.token, "struct")) {
- return 0:*node;
- }
- feed(c);
-
- a = parse_ident(c);
- if (!a) {
- die(c, "expected ident");
- }
-
- if (c.tt != T_LBRA) {
- die(c, "expected {");
- }
- feed(c);
-
- b = parse_member_list(c);
-
- if (c.tt != T_RBRA) {
- die(c, "expected }");
- }
- feed(c);
-
- return mknode(c, N_STRUCT, a, b);
-}
-
-// arg_decl := ':' type
-// ident ':' type
-parse_arg_decl(c: *compiler): *node {
- var n: *node;
- var a: *node;
- var b: *node;
-
- a = parse_ident(c);
- if (!a) {
- return 0:*node;
- }
-
- if (c.tt != T_COLON) {
- die(c, "expected :");
- }
- feed(c);
-
- b = parse_type(c);
- if (!b) {
- die(c, "expected type");
- }
-
- return mknode(c, N_ARGDECL, a, b);
-}
-
-// arg_list := arg_decl
-// | arg_decl ',' arg_list
-parse_arg_list(c: *compiler): *node {
- var n: *node;
- var e: *node;
- var a: *node;
-
- a = parse_arg_decl(c);
- if (!a) {
- return 0:*node;
- }
-
- e = mknode1(c, N_ARGLIST, a);
- n = e;
-
- loop {
- if (c.tt != T_COMMA) {
- return n;
- }
- feed(c);
-
- a = parse_arg_decl(c);
- if (!a) {
- die(c, "expected identifier");
- }
-
- e.b = mknode1(c, N_ARGLIST, a);
- e = e.b;
- }
-}
-
-// func_type := '(' ')' ':' type
-// | '(' arg_list ')' ':' type
-// | '(' ')'
-// | '(' arg_list ')'
-parse_func_type(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- if (c.tt != T_LPAR) {
- return 0: *node;
- }
- feed(c);
-
- a = parse_arg_list(c);
-
- if (c.tt != T_RPAR) {
- die(c, "expected )");
- }
- feed(c);
-
- if (c.tt != T_COLON) {
- return mknode1(c, N_FUNCTYPE, a);
- }
- feed(c);
-
- b = parse_type(c);
- if (!b) {
- die(c, "expected type");
- }
-
- return mknode(c, N_FUNCTYPE, a, b);
-}
-
-// func_decl := ident func_type
-parse_func_decl(c: *compiler): *node {
- var a: *node;
- var b: *node;
-
- a = parse_ident(c);
- if (!a) {
- return 0:*node;
- }
-
- b = parse_func_type(c);
- if (!b) {
- die(c, "expected func_type");
- }
-
- return mknode(c, N_FUNCDECL, a, b);
-}
-
-// func := func_decl '{' stmt_list '}'
-// | func_decl ';'
-parse_func(c: *compiler): *node {
- var n: *node;
- var a: *node;
- var b: *node;
-
- a = parse_func_decl(c);
- if (!a) {
- return 0:*node;
- }
-
- if (c.tt == T_SEMI) {
- feed(c);
- return a;
- }
-
- if (c.tt != T_LBRA) {
- die(c, "expected {");
- }
- feed(c);
-
- b = parse_stmt_list(c);
-
- if (c.tt != T_RBRA) {
- die(c, "expected }");
- }
- feed(c);
-
- return mknode(c, N_FUNC, a, b);
-}
-
-// decl := enum_decl
-// | struct_decl
-// | func
-parse_decl(c: *compiler): *node {
- var n: *node;
-
- n = parse_enum_decl(c);
- if (n) {
- return n;
- }
-
- n = parse_struct_decl(c);
- if (n) {
- return n;
- }
+ fdputs(2, "\n");
+}
- return parse_func(c);
+cdie(c: *compiler, msg: *byte) {
+ show_context(c);
+ fdputs(2, "cdie: ");
+ fdputs(2, msg);
+ fdputs(2, "\n");
+ exit(1);
}
-// program := decl
-// | decl program
-parse_program(c: *compiler): *node {
- var n: *node;
- var e: *node;
- var d: *node;
+comp_setup(a: *alloc): *compiler {
+ var c: *compiler;
- d = parse_decl(c);
- if (!d) {
- if (c.tt != T_EOF) {
- die(c, "expected eof");
- }
- return 0:*node;
- }
+ c = alloc(a, sizeof(*c)): *compiler;
- e = mknode1(c, N_PROGRAM, d);
- n = e;
+ c.a = a;
- loop {
- d = parse_decl(c);
- if (!d) {
- if (c.tt != T_EOF) {
- die(c, "expected eof");
- }
+ c.fdin = 0;
+ c.nc = 0;
+ c.filename = 0:*byte;
+ c.lineno = 1;
+ c.colno = 1;
+ c.tlen = 0;
+ c.tmax = 4096;
+ c.token = alloc(c.a, c.tmax);
+ c.tt = 0;
- return n;
- }
+ c.as = setup_assembler(a);
- e.b = mknode1(c, N_PROGRAM, d);
- e = e.b;
- }
+ c.decls = 0:*decl;
+
+ return c;
}
compile(c: *compiler, p: *node) {
@@ -2287,7 +117,7 @@ compile(c: *compiler, p: *node) {
} else if (kind == N_ENUM) {
defenum(c, n.a);
} else if (kind != N_FUNC && kind != N_FUNCDECL) {
- die(c, "invalid decl");
+ cdie(c, "invalid decl");
}
n = n.b;
@@ -2351,7 +181,7 @@ defextern(c: *compiler, n: *node): *decl {
d = find(c, name, 0:*byte, 1);
if (d.func_defined) {
- die(c, "duplicate function");
+ cdie(c, "duplicate function");
}
d.func_defined = 1;
@@ -2375,13 +205,13 @@ defstruct(c: *compiler, n: *node) {
name = n.a.s;
if (!strcmp(name, "int") || !strcmp(name, "byte") || !strcmp(name, "func")) {
- die(c, "reserved word");
+ cdie(c, "reserved word");
}
d = find(c, name, 0:*byte, 1);
if (d.struct_defined) {
- die(c, "duplicate struct");
+ cdie(c, "duplicate struct");
}
d.struct_defined = 1;
@@ -2404,7 +234,7 @@ defenum(c: *compiler, n: *node) {
d = find(c, name, 0:*byte, 1);
if (d.enum_defined) {
- die(c, "duplicate enum");
+ cdie(c, "duplicate enum");
}
d.enum_defined = 1;
@@ -2416,26 +246,6 @@ defenum(c: *compiler, n: *node) {
}
}
-type_sizeof(c: *compiler, t: *type): int {
- var kind: int;
-
- kind = t.kind;
- if (kind == TY_INT) {
- return 8;
- } else if (kind == TY_BYTE) {
- return 8;
- } else if (kind == TY_PTR) {
- return 8;
- } else if (kind == TY_FUNC) {
- return 8;
- } else if (kind == TY_STRUCT) {
- layout_struct(c, t.st);
- return t.st.struct_size;
- } else {
- die(c, "sizeof: invalid type");
- }
-}
-
layout_struct(c: *compiler, d: *decl) {
var m: *node;
var offset: int;
@@ -2445,7 +255,7 @@ layout_struct(c: *compiler, d: *decl) {
if (d.struct_layout_done) {
if (d.struct_layout_done == 2) {
- die(c, "circular struct definition");
+ cdie(c, "circular struct definition");
}
return;
@@ -2466,7 +276,7 @@ layout_struct(c: *compiler, d: *decl) {
md = find(c, d.name, name, 1);
if (d.member_defined) {
- die(c, "duplicate member");
+ cdie(c, "duplicate member");
}
md.member_defined = 1;
@@ -2506,7 +316,7 @@ compile_func(c: *compiler, d: *decl) {
v = find(c, d.name, name, 1);
if (v.var_defined) {
- die(c, "duplicate argument");
+ cdie(c, "duplicate argument");
}
v.var_defined = 1;
@@ -2522,12 +332,12 @@ compile_func(c: *compiler, d: *decl) {
offset = hoist_locals(c, d, d.func_def.b, 0);
// Compile the function body
- emit_str(c, d.name);
- fixup_label(c, d.func_label);
- emit_preamble(c, offset, !strcmp(d.name, "_start"));
+ emit_str(c.as, d.name);
+ fixup_label(c.as, d.func_label);
+ emit_preamble(c.as, offset, !strcmp(d.name, "_start"));
compile_stmt(c, d, d.func_def.b, 0:*label, 0:*label);
- emit_num(c, 0);
- emit_ret(c);
+ emit_num(c.as, 0);
+ emit_ret(c.as);
}
hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int {
@@ -2568,7 +378,7 @@ hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int {
v = find(c, d.name, name, 1);
if (v.goto_defined) {
- die(c, "duplicate goto");
+ cdie(c, "duplicate goto");
}
v.goto_defined = 1;
@@ -2583,7 +393,7 @@ hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int {
v = find(c, d.name, name, 1);
if (v.var_defined) {
- die(c, "duplicate variable");
+ cdie(c, "duplicate variable");
}
v.var_type = t;
@@ -2596,52 +406,6 @@ hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int {
return offset;
}
-// Unify two types
-unify(c: *compiler, a: *type, b: *type) {
- var kind: int;
-
- if (a == b) {
- return;
- }
-
- if ((a && !b) || (b && !a) || a.kind != b.kind) {
- die(c, "type error");
- }
-
- kind = a.kind;
- if (kind == TY_PTR) {
- unify(c, a.val, b.val);
- } else if (kind == TY_FUNC) {
- unify(c, a.val, b.val);
- unify(c, a.arg, b.arg);
- } else if (kind == TY_ARG) {
- unify(c, a.val, b.val);
- unify(c, a.arg, b.arg);
- } else if (kind == TY_STRUCT) {
- if (a.st != b.st) {
- die(c, "type error");
- }
- } else if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE) {
- die(c, "unify: invalid type");
- }
-}
-
-count_args(c: *compiler, t: *type): int {
- var nargs: int;
-
- nargs = 0;
- loop {
- if (!t) {
- break;
- }
-
- t = t.arg;
- nargs = nargs + 1;
- }
-
- return nargs;
-}
-
// Translate an expression
compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
var no: *label;
@@ -2656,31 +420,31 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
kind = n.kind;
if (kind == N_STR) {
if (!rhs) {
- die(c, "str is not an lexpr");
+ cdie(c, "str is not an lexpr");
}
- emit_str(c, n.s);
+ emit_str(c.as, n.s);
n.t = mktype1(c, TY_PTR, mktype0(c, TY_BYTE));
} else if (kind == N_NUM) {
if (!rhs) {
- die(c, "num is not an lexpr");
+ cdie(c, "num is not an lexpr");
}
- emit_num(c, n.n);
+ emit_num(c.as, n.n);
n.t = mktype0(c, TY_INT);
} else if (kind == N_CHAR) {
if (!rhs) {
- die(c, "char is not an lexpr");
+ cdie(c, "char is not an lexpr");
}
- emit_num(c, n.n);
+ emit_num(c.as, n.n);
n.t = mktype0(c, TY_INT);
} else if (kind == N_EXPRLIST) {
if (!rhs) {
- die(c, "call is not an lexpr");
+ cdie(c, "call is not an lexpr");
}
if (n.b) {
@@ -2696,7 +460,7 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
}
} else if (kind == N_CALL) {
if (!rhs) {
- die(c, "call is not an lexpr");
+ cdie(c, "call is not an lexpr");
}
if (n.b) {
@@ -2706,7 +470,7 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
compile_expr(c, d, n.a, 1);
if (n.a.t.kind != TY_FUNC) {
- die(c, "calling not a function");
+ cdie(c, "calling not a function");
}
if (n.b) {
@@ -2715,7 +479,7 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
unify(c, n.a.t.arg, 0: *type);
}
- emit_call(c, count_args(c, n.a.t.arg));
+ emit_call(c.as, count_args(c, n.a.t.arg));
n.t = n.a.t.val;
} else if (kind == N_DOT) {
@@ -2723,61 +487,61 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
if (n.a.t.kind == TY_PTR) {
if (n.a.t.val.kind != TY_STRUCT) {
- die(c, "dot not a struct");
+ cdie(c, "dot not a struct");
}
v = find(c, n.a.t.val.st.name, n.b.s, 0);
- emit_load(c, n.a.t);
+ emit_load(c.as, n.a.t);
} else {
if (n.a.t.kind != TY_STRUCT) {
- die(c, "dot not a struct");
+ cdie(c, "dot not a struct");
}
v = find(c, n.a.t.st.name, n.b.s, 0);
}
if (!v || !v.member_defined) {
- die(c, "no such member");
+ cdie(c, "no such member");
}
- emit_num(c, v.member_offset);
- emit_add(c);
+ emit_num(c.as, v.member_offset);
+ emit_add(c.as);
n.t = v.member_type;
if (rhs) {
- emit_load(c, n.t);
+ emit_load(c.as, n.t);
}
} else if (kind == N_IDENT) {
v = find(c, n.s, 0:*byte, 0);
if (v && v.enum_defined) {
- emit_num(c, v.enum_value);
+ emit_num(c.as, v.enum_value);
n.t = mktype0(c, TY_INT);
return;
}
v = find(c, d.name, n.s, 0);
if (v && v.var_defined) {
- emit_lea(c, v.var_offset);
+ emit_lea(c.as, v.var_offset);
n.t = v.var_type;
if (rhs) {
- emit_load(c, n.t);
+ emit_load(c.as, n.t);
}
return;
}
v = find(c, n.s, 0:*byte, 0);
if (v && v.func_defined) {
- emit_ptr(c, v.func_label);
+ emit_ptr(c.as, v.func_label);
n.t = v.func_type;
return;
}
- die(c, "no such variable");
+ cdie(c, "no such variable");
} else if (kind == N_ASSIGN) {
if (!rhs) {
- die(c, "assign is not an lexpr");
+ cdie(c, "assign is not an lexpr");
}
compile_expr(c, d, n.b, 1);
@@ -2787,30 +551,30 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
n.t = n.a.t;
- emit_store(c, n.t);
+ emit_store(c.as, n.t);
} else if (kind == N_SIZEOF) {
if (!rhs) {
- die(c, "sizeof is not an lexpr");
+ cdie(c, "sizeof is not an lexpr");
}
- out = mklabel(c);
+ out = mklabel(c.as);
- emit_jmp(c, out);
+ emit_jmp(c.as, out);
compile_expr(c, d, n.a, 0);
- fixup_label(c, out);
+ fixup_label(c.as, out);
if (n.a.t.kind == TY_BYTE) {
- emit_num(c, 1);
+ emit_num(c.as, 1);
} else {
- emit_num(c, type_sizeof(c, n.a.t));
+ emit_num(c.as, type_sizeof(c, n.a.t));
}
n.t = mktype0(c, TY_INT);
} else if (kind == N_REF) {
if (!rhs) {
- die(c, "ref is not an lexpr");
+ cdie(c, "ref is not an lexpr");
}
compile_expr(c, d, n.a, 0);
@@ -2820,435 +584,435 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) {
compile_expr(c, d, n.a, 1);
if (n.a.t.kind != TY_PTR) {
- die(c, "deref not a pointer");
+ cdie(c, "deref not a pointer");
}
n.t = n.a.t.val;
if (rhs) {
- emit_load(c, n.t);
+ emit_load(c.as, n.t);
}
} else if (kind == N_INDEX) {
compile_expr(c, d, n.a, 1);
compile_expr(c, d, n.b, 1);
if (n.a.t.kind != TY_PTR) {
- die(c, "not a pointer");
+ cdie(c, "not a pointer");
}
- if (!type_isint(c, n.b.t)) {
- die(c, "index: not an int");
+ if (!type_isint(n.b.t)) {
+ cdie(c, "index: not an int");
}
n.t = n.a.t.val;
if (n.t.kind == TY_BYTE) {
- emit_num(c, 1);
+ emit_num(c.as, 1);
} else {
- emit_num(c, type_sizeof(c, n.t));
+ emit_num(c.as, type_sizeof(c, n.t));
}
- emit_mul(c);
- emit_add(c);
+ emit_mul(c.as);
+ emit_add(c.as);
if (rhs) {
- emit_load(c, n.t);
+ emit_load(c.as, n.t);
}
} else if (kind == N_LT) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_lt(c);
+ emit_lt(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isprim(c, n.a.t)) {
- die(c, "lt: not an int");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "lt: not an int");
}
n.t = n.a.t;
} else if (kind == N_GT) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_gt(c);
+ emit_gt(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isprim(c, n.a.t)) {
- die(c, "gt: not an int");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "gt: not an int");
}
n.t = n.a.t;
} else if (kind == N_LE) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_le(c);
+ emit_le(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isprim(c, n.a.t)) {
- die(c, "le: not an int");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "le: not an int");
}
n.t = n.a.t;
} else if (kind == N_GE) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_ge(c);
+ emit_ge(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isprim(c, n.a.t)) {
- die(c, "ge: not an int");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "ge: not an int");
}
n.t = n.a.t;
} else if (kind == N_EQ) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_eq(c);
+ emit_eq(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isprim(c, n.a.t)) {
- die(c, "eq: not an int");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "eq: not an int");
}
n.t = n.a.t;
} else if (kind == N_NE) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_ne(c);
+ emit_ne(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isprim(c, n.a.t)) {
- die(c, "ne: not an int");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "ne: not an int");
}
n.t = n.a.t;
} else if (kind == N_BNOT) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
- no = mklabel(c);
- out = mklabel(c);
+ no = mklabel(c.as);
+ out = mklabel(c.as);
compile_expr(c, d, n.a, 1);
- emit_jz(c, no);
- emit_num(c, 0);
- emit_jmp(c, out);
- fixup_label(c, no);
- emit_num(c, 1);
- fixup_label(c, out);
+ emit_jz(c.as, no);
+ emit_num(c.as, 0);
+ emit_jmp(c.as, out);
+ fixup_label(c.as, no);
+ emit_num(c.as, 1);
+ fixup_label(c.as, out);
- if (!type_isprim(c, n.a.t)) {
- die(c, "not an prim");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "not an prim");
}
n.t = mktype0(c, TY_INT);
} else if (kind == N_BOR) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
- no = mklabel(c);
- out = mklabel(c);
+ no = mklabel(c.as);
+ out = mklabel(c.as);
compile_expr(c, d, n.a, 1);
- emit_jz(c, no);
- emit_num(c, 1);
- emit_jmp(c, out);
+ emit_jz(c.as, no);
+ emit_num(c.as, 1);
+ emit_jmp(c.as, out);
- fixup_label(c, no);
- no = mklabel(c);
+ fixup_label(c.as, no);
+ no = mklabel(c.as);
compile_expr(c, d, n.b, 1);
- emit_jz(c, no);
- emit_num(c, 1);
- emit_jmp(c, out);
+ emit_jz(c.as, no);
+ emit_num(c.as, 1);
+ emit_jmp(c.as, out);
- fixup_label(c, no);
- emit_num(c, 0);
+ fixup_label(c.as, no);
+ emit_num(c.as, 0);
- fixup_label(c, out);
+ fixup_label(c.as, out);
- if (!type_isprim(c, n.a.t)) {
- die(c, "not an prim");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "not an prim");
}
- if (!type_isprim(c, n.b.t)) {
- die(c, "not an prim");
+ if (!type_isprim(n.b.t)) {
+ cdie(c, "not an prim");
}
n.t = mktype0(c, TY_INT);
} else if (kind == N_BAND) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
- no = mklabel(c);
- out = mklabel(c);
+ no = mklabel(c.as);
+ out = mklabel(c.as);
compile_expr(c, d, n.a, 1);
- emit_jz(c, no);
+ emit_jz(c.as, no);
compile_expr(c, d, n.b, 1);
- emit_jz(c, no);
+ emit_jz(c.as, no);
- emit_num(c, 1);
- emit_jmp(c, out);
+ emit_num(c.as, 1);
+ emit_jmp(c.as, out);
- fixup_label(c, no);
- emit_num(c, 0);
+ fixup_label(c.as, no);
+ emit_num(c.as, 0);
- fixup_label(c, out);
+ fixup_label(c.as, out);
- if (!type_isprim(c, n.a.t)) {
- die(c, "not an prim");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "not an prim");
}
- if (!type_isprim(c, n.b.t)) {
- die(c, "not an prim");
+ if (!type_isprim(n.b.t)) {
+ cdie(c, "not an prim");
}
n.t = mktype0(c, TY_INT);
} else if (kind == N_POS) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.a, 1);
- if (!type_isint(c, n.a.t)) {
- die(c, "pos: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "pos: not an int");
}
n.t = n.a.t;
} else if (kind == N_NEG) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.a, 1);
- emit_neg(c);
+ emit_neg(c.as);
- if (!type_isint(c, n.a.t)) {
- die(c, "neg: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "neg: not an int");
}
n.t = n.a.t;
} else if (kind == N_NOT) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.a, 1);
- emit_not(c);
+ emit_not(c.as);
- if (!type_isint(c, n.a.t)) {
- die(c, "not: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "not: not an int");
}
n.t = n.a.t;
} else if (kind == N_ADD) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_add(c);
+ emit_add(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "add: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "add: not an int");
}
n.t = n.a.t;
} else if (kind == N_SUB) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_sub(c);
+ emit_sub(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "sub: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "sub: not an int");
}
n.t = n.a.t;
} else if (kind == N_MUL) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_mul(c);
+ emit_mul(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "mul: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "mul: not an int");
}
n.t = n.a.t;
} else if (kind == N_DIV) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_div(c);
+ emit_div(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "div: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "div: not an int");
}
n.t = n.a.t;
} else if (kind == N_MOD) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_mod(c);
+ emit_mod(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "mod: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "mod: not an int");
}
n.t = n.a.t;
} else if (kind == N_LSH) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_lsh(c);
+ emit_lsh(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "lsh: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "lsh: not an int");
}
n.t = n.a.t;
} else if (kind == N_RSH) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_rsh(c);
+ emit_rsh(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "rsh: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "rsh: not an int");
}
n.t = n.a.t;
} else if (kind == N_AND) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_and(c);
+ emit_and(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "and: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "and: not an int");
}
n.t = n.a.t;
} else if (kind == N_OR) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_or(c);
+ emit_or(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "or: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "or: not an int");
}
n.t = n.a.t;
} else if (kind == N_XOR) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.b, 1);
compile_expr(c, d, n.a, 1);
- emit_xor(c);
+ emit_xor(c.as);
unify(c, n.a.t, n.b.t);
- if (!type_isint(c, n.a.t)) {
- die(c, "xor: not an int");
+ if (!type_isint(n.a.t)) {
+ cdie(c, "xor: not an int");
}
n.t = n.a.t;
} else if (kind == N_CAST) {
if (!rhs) {
- die(c, "not lexpr");
+ cdie(c, "not lexpr");
}
compile_expr(c, d, n.a, 1);
- if (!type_isprim(c, n.a.t)) {
- die(c, "not a primitive");
+ if (!type_isprim(n.a.t)) {
+ cdie(c, "not a primitive");
}
n.t = prototype(c, n.b);
} else {
- die(c, "not an expression");
+ cdie(c, "not an expression");
}
}
@@ -3269,30 +1033,30 @@ compile_stmt(c: *compiler, d: *decl, n: *node, top: *label, out: *label) {
kind = n.kind;
if (kind == N_CONDLIST) {
- ifout = mklabel(c);
+ ifout = mklabel(c.as);
no = 0: *label;
loop {
if (no) {
- fixup_label(c, no);
+ fixup_label(c.as, no);
}
if (!n) {
break;
}
- no = mklabel(c);
+ no = mklabel(c.as);
if (n.a.a) {
compile_expr(c, d, n.a.a, 1);
- emit_jz(c, no);
+ emit_jz(c.as, no);
}
compile_stmt(c, d, n.a.b, top, out);
- emit_jmp(c, ifout);
+ emit_jmp(c.as, ifout);
n = n.b;
}
- fixup_label(c, ifout);
+ fixup_label(c.as, ifout);
} else if (kind == N_STMTLIST) {
loop {
if (!n) {
@@ -3302,48 +1066,48 @@ compile_stmt(c: *compiler, d: *decl, n: *node, top: *label, out: *label) {
n = n.b;
}
} else if (kind == N_LOOP) {
- top = mklabel(c);
- out = mklabel(c);
- fixup_label(c, top);
+ top = mklabel(c.as);
+ out = mklabel(c.as);
+ fixup_label(c.as, top);
compile_stmt(c, d, n.a, top, out);
- emit_jmp(c, top);
- fixup_label(c, out);
+ emit_jmp(c.as, top);
+ fixup_label(c.as, out);
} else if (kind == N_BREAK) {
if (!out) {
- die(c, "break outside loop");
+ cdie(c, "break outside loop");
}
- emit_jmp(c, out);
+ emit_jmp(c.as, out);
} else if (kind == N_CONTINUE) {
if (!top) {
- die(c, "continue outside loop");
+ cdie(c, "continue outside loop");
}
- emit_jmp(c, top);
+ emit_jmp(c.as, top);
} else if (kind == N_RETURN) {
if (n.a) {
if (d.func_type.val.kind == TY_VOID) {
- die(c, "returning a value in a void function");
+ cdie(c, "returning a value in a void function");
}
compile_expr(c, d, n.a, 1);
unify(c, n.a.t, d.func_type.val);
} else {
if (d.func_type.val.kind != TY_VOID) {
- die(c, "returning void in a non void function");
+ cdie(c, "returning void in a non void function");
}
- emit_num(c, 0);
+ emit_num(c.as, 0);
}
- emit_ret(c);
+ emit_ret(c.as);
} else if (kind == N_LABEL) {
v = find(c, d.name, n.a.s, 0);
- fixup_label(c, v.goto_label);
+ fixup_label(c.as, v.goto_label);
} else if (kind == N_GOTO) {
v = find(c, d.name, n.a.s, 0);
if (!v || !v.goto_defined) {
- die(c, "label not defined");
+ cdie(c, "label not defined");
}
- emit_jmp(c, v.goto_label);
+ emit_jmp(c.as, v.goto_label);
} else if (kind != N_VARDECL) {
compile_expr(c, d, n, 1);
- emit_pop(c, 1);
+ emit_pop(c.as, 1);
}
}
@@ -3391,7 +1155,7 @@ find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl {
return 0:*decl;
}
- d = alloc(c, sizeof(*d)): *decl;
+ d = alloc(c.a, sizeof(*d)): *decl;
d.name = name;
d.member_name = member_name;
@@ -3402,7 +1166,7 @@ find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl {
d.func_defined = 0;
d.func_type = 0:*type;
- d.func_label = mklabel(c);
+ d.func_label = mklabel(c.as);
d.func_def = 0:*node;
d.struct_defined = 0;
@@ -3425,7 +1189,7 @@ find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl {
d.var_def = 0:*node;
d.goto_defined = 0;
- d.goto_label = mklabel(c);
+ d.goto_label = mklabel(c.as);
*link = d;
@@ -3480,1035 +1244,16 @@ next_decl(c: *compiler, d: *decl): *decl {
}
}
-mktype(c: *compiler, kind: int, a: *type, b: *type, st: *decl): *type {
- var t: *type;
-
- t = alloc(c, sizeof(*t)):*type;
-
- t.kind = kind;
- t.st = st;
- t.val = a;
- t.arg = b;
-
- return t;
-}
-
-mktype_struct(c: *compiler, st: *decl): *type {
- return mktype(c, TY_STRUCT, 0:*type, 0:*type, st);
-}
-
-mktype0(c: *compiler, kind: int): *type {
- return mktype(c, kind, 0:*type, 0:*type, 0:*decl);
-}
-
-mktype1(c: *compiler, kind: int, a: *type): *type {
- return mktype(c, kind, a, 0:*type, 0:*decl);
-}
-
-mktype2(c: *compiler, kind: int, a: *type, b: *type): *type {
- return mktype(c, kind, a, b, 0:*decl);
-}
-
-prototype(c: *compiler, n: *node): *type {
- var a: *type;
- var b: *type;
- var st: *decl;
- var kind: int;
-
- if (!n) {
- return 0:*type;
- }
-
- c.lineno = n.lineno;
- c.colno = 0;
-
- kind = n.kind;
- if (kind == N_IDENT) {
- if (!strcmp(n.s, "void")) {
- return mktype0(c, TY_VOID);
- }
-
- if (!strcmp(n.s, "int")) {
- return mktype0(c, TY_INT);
- }
-
- if (!strcmp(n.s, "byte")) {
- return mktype0(c, TY_BYTE);
- }
-
- st = find(c, n.s, 0:*byte, 0);
- if (!st || !st.struct_defined) {
- die(c, "unknown struct");
- }
-
- return mktype_struct(c, st);
- } else if (kind == N_ARGLIST) {
- a = prototype(c, n.a.b);
- b = prototype(c, n.b);
-
- kind = a.kind;
- if (kind != TY_INT && kind != TY_BYTE
- && kind != TY_PTR && kind != TY_FUNC) {
- die(c, "not a ptr arg");
- }
-
- return mktype2(c, TY_ARG, a, b);
- } else if (kind == N_FUNCTYPE) {
- if (n.b) {
- a = prototype(c, n.b);
- } else{
- a = mktype0(c, TY_VOID);
- }
-
- b = prototype(c, n.a);
-
- kind = a.kind;
- if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE
- && kind != TY_PTR && kind != TY_FUNC) {
- die(c, "not a ptr return");
- }
-
- return mktype2(c, TY_FUNC, a, b);
- } else if (kind == N_PTRTYPE) {
- return mktype1(c, TY_PTR, prototype(c, n.a));
- } else {
- die(c, "prototype: invalid type");
- }
-}
-
-type_isint(c: *compiler, t: *type): int {
- return t.kind == TY_INT || t.kind == TY_BYTE;
-}
-
-type_isprim(c: *compiler, t: *type): int {
- return t.kind != TY_VOID && t.kind != TY_STRUCT;
-}
-
-// Create a new label
-mklabel(c: *compiler): *label {
- var l: *label;
-
- l = alloc(c, sizeof(*l)):*label;
-
- l.fix = 0:*fixup;
- l.at = 0;
- l.fixed = 0;
-
- return l;
-}
-
-// Reserve size in the output buffer
-reserve(c: *compiler, n: int) {
- var m: *byte;
- var b: *chunk;
-
- if (c.text_end && c.text_end.cap - c.text_end.fill >= n) {
- return;
- }
-
- if (n < 4096) {
- n = 4096;
- }
-
- m = alloc(c, n);
- b = alloc(c, sizeof(*b)):*chunk;
-
- b.buf = m;
- b.fill = 0;
- b.cap = n;
- b.next = 0:*chunk;
-
- if (c.text_end) {
- c.text_end.next = b;
- c.text_end = b;
- } else {
- c.text = b;
- c.text_end = b;
- }
-}
-
-// Add a single byte to the output
-emit(c: *compiler, x: int) {
- reserve(c, 1);
- c.text_end.buf[c.text_end.fill] = x:byte;
- c.text_end.fill = c.text_end.fill + 1;
- c.at = c.at + 1;
-}
-
-// Fix a single reference
-fixup(c: *compiler, here: *byte, delta: int) {
- here[0] = delta: byte;
- here[1] = (delta >> 8): byte;
- here[2] = (delta >> 16): byte;
- here[3] = (delta >> 24): byte;
-}
-
-// Add an new fixup for the current position
-addfixup(c: *compiler, l: *label) {
- var f: *fixup;
- var here: *byte;
-
- reserve(c, 4);
-
- here = &c.text_end.buf[c.text_end.fill];
-
- emit(c, 0);
- emit(c, 0);
- emit(c, 0);
- emit(c, 0);
-
- if (l.fixed) {
- fixup(c, here, l.at - c.at);
- } else {
- f = alloc(c, sizeof(*f)): *fixup;
-
- f.next = l.fix;
- f.ptr = here;
- f.at = c.at;
-
- l.fix = f;
- }
-}
-
-// Fix references to a label to the current position
-fixup_label(c: *compiler, l: *label) {
- var f: *fixup;
-
- if (l.fixed) {
- die(c, "already fixed");
- }
-
- l.at = c.at;
- l.fixed = 1;
-
- f = l.fix;
- loop {
- if (!f) {
- break;
- }
- fixup(c, f.ptr, l.at - f.at);
- f = f.next;
- }
-}
-
-emit_ptr(c: *compiler, l: *label) {
- // lea %rax, [l]
- emit(c, 0x48);
- emit(c, 0x8d);
- emit(c, 0x05);
- addfixup(c, l);
- // push %rax
- emit(c, 0x50);
-}
-
-emit_jmp(c: *compiler, l: *label) {
- // jmp l
- emit(c, 0xe9);
- addfixup(c, l);
-}
-
-emit_num(c: *compiler, x: int) {
- // push x
- emit(c, 0x68);
- emit(c, x);
- emit(c, x >> 8);
- emit(c, x >> 16);
- emit(c, x >> 24);
-}
-
-emit_str(c: *compiler, s: *byte) {
- var a: *label;
- var b: *label;
- var i: int;
-
- a = mklabel(c);
- b = mklabel(c);
-
- // jmp b
- emit_jmp(c, b);
-
- // a:
- fixup_label(c, a);
-
- i = 0;
- // .string s
- loop {
- if (!s[i]) {
- break;
- }
- emit(c, s[i]:int);
- i = i + 1;
- }
- emit(c, 0);
-
- // nop sled
- emit(c, 0x90);
- emit(c, 0x90);
- emit(c, 0x90);
- emit(c, 0x90);
- emit(c, 0x90);
- emit(c, 0x90);
- emit(c, 0x90);
- emit(c, 0x90);
-
- // b:
- fixup_label(c, b);
- // push a
- emit_ptr(c, a);
-}
-
-emit_pop(c: *compiler, n: int) {
- n = n * 8;
- // add rsp, 8*n
- emit(c, 0x48);
- emit(c, 0x81);
- emit(c, 0xc4);
- emit(c, n);
- emit(c, n >> 8);
- emit(c, n >> 16);
- emit(c, n >> 24);
-}
-
-emit_preamble(c: *compiler, n: int, start: int) {
- var i: int;
- if (start) {
- // xor rbp, rbp
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xed);
- // mov rdi, [rsp]
- emit(c, 0x48);
- emit(c, 0x8b);
- emit(c, 0x3c);
- emit(c, 0x24);
- // lea rsi, [rsp + 8]
- emit(c, 0x48);
- emit(c, 0x8d);
- emit(c, 0x74);
- emit(c, 0x24);
- emit(c, 0x08);
- // lea rdx, [rsi + rdi * 8 + 8]
- emit(c, 0x48);
- emit(c, 0x8d);
- emit(c, 0x54);
- emit(c, 0xfe);
- emit(c, 0x08);
- // push rdx
- emit(c, 0x52);
- // push rsi
- emit(c, 0x56);
- // push rdi
- emit(c, 0x57);
- // push rbp
- emit(c, 0x55);
- }
- // push rbp
- emit(c, 0x55);
- // mov rbp, rsp
- emit(c, 0x48);
- emit(c, 0x89);
- emit(c, 0xe5);
- i = 0;
- loop {
- if (i >= n) {
- break;
- }
- emit_num(c, 0);
- i = i + 8;
- }
-}
-
-emit_store(c: *compiler, t: *type) {
- // pop rdi
- emit(c, 0x5f);
- // pop rax
- emit(c, 0x58);
- if (t.kind == TY_BYTE) {
- // mov [rdi], al
- emit(c, 0x88);
- emit(c, 0x07);
- } else if (type_isprim(c, t)) {
- // mov [rdi], rax
- emit(c, 0x48);
- emit(c, 0x89);
- emit(c, 0x07);
- } else {
- die(c, "invalid store");
- }
- // push rax
- emit(c, 0x50);
-}
-
-emit_load(c: *compiler, t: *type) {
- // pop rdi
- emit(c, 0x5f);
- if (t.kind == TY_BYTE) {
- // xor rax, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xc0);
- // mov al, [rdi]
- emit(c, 0x8a);
- emit(c, 0x07);
- } else if (type_isprim(c, t)) {
- // mov rax, [rdi]
- emit(c, 0x48);
- emit(c, 0x8b);
- emit(c, 0x07);
- } else {
- die(c, "invalid load");
- }
- // push rax
- emit(c, 0x50);
-}
-
-emit_jz(c: *compiler, l: *label) {
- // pop rax
- emit(c, 0x58);
- // test rax, rax
- emit(c, 0x48);
- emit(c, 0x85);
- emit(c, 0xc0);
- // jz no
- emit(c, 0x0f);
- emit(c, 0x84);
- addfixup(c, l);
-}
-
-emit_lea(c: *compiler, offset: int) {
- // lea rax, [rbp + offset]
- emit(c, 0x48);
- emit(c, 0x8d);
- emit(c, 0x85);
- emit(c, offset);
- emit(c, offset >> 8);
- emit(c, offset >> 16);
- emit(c, offset >> 24);
- // push rax
- emit(c, 0x50);
-}
-
-emit_and(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rdx
- emit(c, 0x5a);
- // and rdx, rax
- emit(c, 0x48);
- emit(c, 0x21);
- emit(c, 0xd0);
- // push rax
- emit(c, 0x50);
-}
-
-emit_or(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rdx
- emit(c, 0x5a);
- // or rdx, rax
- emit(c, 0x48);
- emit(c, 0x09);
- emit(c, 0xd0);
- // push rax
- emit(c, 0x50);
-}
-
-emit_xor(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rdx
- emit(c, 0x5a);
- // xor rdx, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xd0);
- // push rax
- emit(c, 0x50);
-}
-
-emit_add(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rdx
- emit(c, 0x5a);
- // add rdx, rax
- emit(c, 0x48);
- emit(c, 0x01);
- emit(c, 0xd0);
- // push rax
- emit(c, 0x50);
-}
-
-emit_ret(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // mov rsp, rbp
- emit(c, 0x48);
- emit(c, 0x89);
- emit(c, 0xec);
- // pop rbp
- emit(c, 0x5d);
- // ret
- emit(c, 0xc3);
-}
-
-emit_call(c: *compiler, n: int) {
- // pop rax
- emit(c, 0x58);
- // call rax
- emit(c, 0xff);
- emit(c, 0xd0);
- // add rsp, 8*(n+1)
- emit_pop(c, n);
- // push rax
- emit(c, 0x50);
-}
-
-emit_gt(c: *compiler) {
- // pop rdx
- emit(c, 0x5a);
- // pop rcx
- emit(c, 0x59);
- // xor rax, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xc0);
- // cmp rdx, rcx
- emit(c, 0x48);
- emit(c, 0x39);
- emit(c, 0xca);
- // setg al
- emit(c, 0x0f);
- emit(c, 0x9f);
- emit(c, 0xc0);
- // mov rax
- emit(c, 0x50);
-}
-
-emit_lt(c: *compiler) {
- // pop rdx
- emit(c, 0x5a);
- // pop rcx
- emit(c, 0x59);
- // xor rax, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xc0);
- // cmp rdx, rcx
- emit(c, 0x48);
- emit(c, 0x39);
- emit(c, 0xca);
- // setl al
- emit(c, 0x0f);
- emit(c, 0x9c);
- emit(c, 0xc0);
- // mov rax
- emit(c, 0x50);
-}
-
-emit_ge(c: *compiler) {
- // pop rdx
- emit(c, 0x5a);
- // pop rcx
- emit(c, 0x59);
- // xor rax, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xc0);
- // cmp rdx, rcx
- emit(c, 0x48);
- emit(c, 0x39);
- emit(c, 0xca);
- // setge al
- emit(c, 0x0f);
- emit(c, 0x9d);
- emit(c, 0xc0);
- // mov rax
- emit(c, 0x50);
-}
-
-emit_le(c: *compiler) {
- // pop rdx
- emit(c, 0x5a);
- // pop rcx
- emit(c, 0x59);
- // xor rax, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xc0);
- // cmp rdx, rcx
- emit(c, 0x48);
- emit(c, 0x39);
- emit(c, 0xca);
- // setle al
- emit(c, 0x0f);
- emit(c, 0x9e);
- emit(c, 0xc0);
- // mov rax
- emit(c, 0x50);
-}
-
-emit_eq(c: *compiler) {
- // pop rdx
- emit(c, 0x5a);
- // pop rcx
- emit(c, 0x59);
- // xor rax, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xc0);
- // cmp rdx, rcx
- emit(c, 0x48);
- emit(c, 0x39);
- emit(c, 0xca);
- // sete al
- emit(c, 0x0f);
- emit(c, 0x94);
- emit(c, 0xc0);
- // mov rax
- emit(c, 0x50);
-}
-
-emit_ne(c: *compiler) {
- // pop rdx
- emit(c, 0x5a);
- // pop rcx
- emit(c, 0x59);
- // xor rax, rax
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xc0);
- // cmp rdx, rcx
- emit(c, 0x48);
- emit(c, 0x39);
- emit(c, 0xca);
- // setne al
- emit(c, 0x0f);
- emit(c, 0x95);
- emit(c, 0xc0);
- // mov rax
- emit(c, 0x50);
-}
-
-emit_sub(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rdx
- emit(c, 0x5a);
- // add rax, rdx
- emit(c, 0x48);
- emit(c, 0x29);
- emit(c, 0xd0);
- // push rax
- emit(c, 0x50);
-}
-
-emit_mul(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rcx
- emit(c, 0x59);
- // mul rcx
- emit(c, 0x48);
- emit(c, 0xf7);
- emit(c, 0xe1);
- // push rax
- emit(c, 0x50);
-}
-
-emit_div(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rcx
- emit(c, 0x59);
- // xor rdx, rdx
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xd2);
- // test rax, rax
- emit(c, 0x48);
- emit(c, 0x85);
- emit(c, 0xc0);
- // sets dl
- emit(c, 0x0f);
- emit(c, 0x98);
- emit(c, 0xc2);
- // neg rdx
- emit(c, 0x48);
- emit(c, 0xf7);
- emit(c, 0xda);
- // idiv rcx
- emit(c, 0x48);
- emit(c, 0xf7);
- emit(c, 0xf9);
- // push rax
- emit(c, 0x50);
-}
-
-emit_mod(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rcx
- emit(c, 0x59);
- // xor rdx, rdx
- emit(c, 0x48);
- emit(c, 0x31);
- emit(c, 0xd2);
- // test rax, rax
- emit(c, 0x48);
- emit(c, 0x85);
- emit(c, 0xc0);
- // sets dl
- emit(c, 0x0f);
- emit(c, 0x98);
- emit(c, 0xc2);
- // neg rdx
- emit(c, 0x48);
- emit(c, 0xf7);
- emit(c, 0xda);
- // idiv rcx
- emit(c, 0x48);
- emit(c, 0xf7);
- emit(c, 0xf9);
- // push rdx
- emit(c, 0x52);
-}
-
-emit_lsh(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rcx
- emit(c, 0x59);
- // shl rax, cl
- emit(c, 0x48);
- emit(c, 0xd3);
- emit(c, 0xe0);
- // push rax
- emit(c, 0x50);
-}
-
-emit_rsh(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // pop rcx
- emit(c, 0x59);
- // shr rax, cl
- emit(c, 0x48);
- emit(c, 0xd3);
- emit(c, 0xe8);
- // push rax
- emit(c, 0x50);
-}
-
-emit_not(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // neg rax
- emit(c, 0x48);
- emit(c, 0xf7);
- emit(c, 0xd0);
- // push rax
- emit(c, 0x50);
-}
-
-emit_neg(c: *compiler) {
- // pop rax
- emit(c, 0x58);
- // neg rax
- emit(c, 0x48);
- emit(c, 0xf7);
- emit(c, 0xd8);
- // push rax
- emit(c, 0x50);
-}
-
-gen_builtins(c: *compiler) {
- var d: *decl;
-
- d = find(c, "syscall", 0:*byte, 1);
- if (d.func_defined && !d.func_label.fixed) {
- d.func_defined = 1;
- fixup_label(c, d.func_label);
- // push rbp
- emit(c, 0x55);
- // mov rbp, rsp
- emit(c, 0x48);
- emit(c, 0x89);
- emit(c, 0xe5);
- // mov rax, [rbp + 16]
- emit(c, 0x48);
- emit(c, 0x8b);
- emit(c, 0x45);
- emit(c, 0x10);
- // mov rdi, [rbp + 24]
- emit(c, 0x48);
- emit(c, 0x8b);
- emit(c, 0x7d);
- emit(c, 0x18);
- // mov rsi, [rbp + 32]
- emit(c, 0x48);
- emit(c, 0x8b);
- emit(c, 0x75);
- emit(c, 0x20);
- // mov rdx, [rbp + 40]
- emit(c, 0x48);
- emit(c, 0x8b);
- emit(c, 0x55);
- emit(c, 0x28);
- // mov r10, [rbp + 48]
- emit(c, 0x4c);
- emit(c, 0x8b);
- emit(c, 0x55);
- emit(c, 0x30);
- // mov r8, [rbp + 56]
- emit(c, 0x4c);
- emit(c, 0x8b);
- emit(c, 0x45);
- emit(c, 0x38);
- // mov r9, [rbp + 64]
- emit(c, 0x4c);
- emit(c, 0x8b);
- emit(c, 0x4d);
- emit(c, 0x40);
- // syscall
- emit(c, 0x0f);
- emit(c, 0x05);
- // pop rbp
- emit(c, 0x5d);
- // ret
- emit(c, 0xc3);
- }
-}
-
-writeout(c: *compiler) {
- var b: *chunk;
- var i: int;
- var text_size: int;
- var load_addr: int;
- var entry: int;
- var d: *decl;
-
- load_addr = 0x100000;
- text_size = c.at;
-
- d = find(c, "_start", 0:*byte, 0);
- if (!d || !d.func_defined || !d.func_label.fixed) {
- die(c, "_start is not defined");
- }
-
- entry = load_addr + d.func_label.at + 128;
- text_size = text_size + 128;
-
- // magic
- putchar(c, 0x7f);
- putchar(c, 'E');
- putchar(c, 'L');
- putchar(c, 'F');
-
- // class
- putchar(c, 2);
-
- // endian
- putchar(c, 1);
-
- // version
- putchar(c, 1);
-
- // abi
- putchar(c, 0);
-
- // abi version
- putchar(c, 0);
-
- // padding
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // type
- putchar(c, 2);
- putchar(c, 0);
-
- // machine
- putchar(c, 62);
- putchar(c, 0);
-
- // version
- putchar(c, 1);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // entry point
- putchar(c, entry);
- putchar(c, entry >> 8);
- putchar(c, entry >> 16);
- putchar(c, entry >> 24);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phoff
- putchar(c, 64);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // shoff
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // flags
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // ehsize
- putchar(c, 64);
- putchar(c, 0);
-
- // phentsize
- putchar(c, 56);
- putchar(c, 0);
-
- // phnum
- putchar(c, 1);
- putchar(c, 0);
-
- // shentsize
- putchar(c, 64);
- putchar(c, 0);
-
- // shnum
- putchar(c, 0);
- putchar(c, 0);
-
- // shstrndx
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].type
- putchar(c, 1);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].flags
- putchar(c, 5);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].offset
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].vaddr
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0x10);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].paddr
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].filesize
- putchar(c, text_size);
- putchar(c, text_size >> 8);
- putchar(c, text_size >> 16);
- putchar(c, text_size >> 24);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].memsize
- putchar(c, text_size);
- putchar(c, text_size >> 8);
- putchar(c, text_size >> 16);
- putchar(c, text_size >> 24);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // phdr[0].align
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
- putchar(c, 0);
-
- // nop sled
- putchar(c, 0x90);
- putchar(c, 0x90);
- putchar(c, 0x90);
- putchar(c, 0x90);
- putchar(c, 0x90);
- putchar(c, 0x90);
- putchar(c, 0x90);
- putchar(c, 0x90);
-
- b = c.text;
- loop {
- if (!b) {
- break;
- }
- i = 0;
- loop {
- if (i >= b.fill) {
- break;
- }
- putchar(c, b.buf[i]: int);
- i = i + 1;
- }
- b = b.next;
- }
-}
-
main(argc: int, argv: **byte, envp: **byte) {
- var c: compiler;
+ var a: alloc;
+ var c: *compiler;
var p: *node;
+ var d: *decl;
var i: int;
- comp_setup(&c);
+ setup_alloc(&a);
+
+ c = comp_setup(&a);
i = 1;
loop {
@@ -4519,25 +1264,42 @@ main(argc: int, argv: **byte, envp: **byte) {
if (!strcmp(argv[i], "-o")) {
i = i + 1;
if (i >= argc) {
- die(&c, "invalid -o at end of argument list");
+ die("invalid -o at end of argument list");
}
- open_output(&c, argv[i]);
+ open_output(c.as, argv[i]);
i = i + 1;
continue;
}
- open_source(&c, argv[i]);
- p = parse_program(&c);
- close_source(&c);
- compile(&c, p);
+ if (argv[i][0] == '-':byte) {
+ die("invalid argument");
+ }
+
+ open_source(c, argv[i]);
+ p = parse_program(c, p);
+ close_source(c);
i = i + 1;
}
- if (c.fdout == 1) {
- open_output(&c, "a.out");
+ compile(c, p);
+
+ if (c.as.fdout == 1) {
+ open_output(c.as, "a.out");
+ }
+
+ d = find(c, "syscall", 0:*byte, 1);
+ if (d.func_defined && !d.func_label.fixed) {
+ fixup_label(c.as, d.func_label);
+ emit_preamble(c.as, 0, 0);
+ emit_syscall(c.as);
+ emit_ret(c.as);
+ }
+
+ d = find(c, "_start", 0:*byte, 0);
+ if (!d || !d.func_defined) {
+ die("no _start");
}
- gen_builtins(&c);
- writeout(&c);
+ writeout(c.as, d.func_label);
}
diff --git a/cc3.y b/cc3.y
@@ -0,0 +1,152 @@
+start program;
+
+primary: ident
+ | num
+ | hex
+ | str
+ | chr
+ | lpar expr rpar
+;
+
+expr_list: expr
+ | expr comma expr_list
+;
+
+post_expr: primary
+ | sizeof lpar expr rpar
+ | post_expr lsq expr rsq
+ | post_expr lpar expr_list rpar
+ | post_expr dot ident
+ | post_expr colon type
+;
+
+unary_expr : post_expr
+ | and unary_expr
+ | star unary_expr
+ | plus unary_expr
+ | minus unary_expr
+ | tilde unary_expr
+ | bang unary_expr
+;
+
+shift_expr : unary_expr
+ | unary_expr lsh shift_expr
+ | unary_expr rsh shift_expr
+;
+
+mul_expr : shift_expr
+ | shift_expr star mul_expr
+ | shift_expr div mul_expr
+ | shift_expr mod mul_expr
+ | shift_expr and mul_expr
+;
+
+add_expr : mul_expr
+ | mul_expr plus add_expr
+ | mul_expr minus add_expr
+ | mul_expr or add_expr
+ | mul_expr xor add_expr
+;
+
+comp_expr : add_expr
+ | add_expr lt add_expr
+ | add_expr gt add_expr
+ | add_expr le add_expr
+ | add_expr gt add_expr
+ | add_expr eq add_expr
+ | add_expr ne add_expr
+;
+
+bool_expr : bool_expr
+ | add_expr and_then bool_expr
+ | add_expr or_else bool_expr
+;
+
+expr : bool_expr
+ | bool_expr equal expr
+;
+
+if_stmt : if expr lbra stmt_list rbra
+ | if expr lbra stmt_list rbra else if_stmt
+ | if expr lbra stmt_list rbra else lbra stmt_list rbra
+;
+
+loop_stmt : loop lbra stmt_list rbra;
+
+break_stmt : break;
+
+continue_stmt : continue;
+
+return_stmt : return;
+ | return expr
+;
+
+var_decl : ident colon type;
+
+var_stmt : var var_decl;
+
+label_stmt : colon ident
+
+goto_stmt : goto ident;
+
+stmt : if_stmt
+ | loop_stmt
+ | break_stmt semi
+ | continue_stmt semi
+ | return_stmt semi
+ | var_stmt semi
+ | label_stmt semi
+ | goto_stmt semi
+ | expr semi
+;
+
+stmt_list : stmt
+ | stmt stmt_list
+;
+
+enum_list : ident
+ | enum_list comma enum_list
+;
+
+enum_decl : enum ident lbra enum_list rbra;
+
+type : ident
+ | star type
+ | lpar type rpar
+ | func func_type
+;
+
+member_decl : ident colon type;
+
+member_list : member_decl
+ | member_decl semi member_list
+;
+
+struct_decl : struct ident lbra member_list rbra;
+
+arg_decl : colon type
+ ident colon type
+;
+
+arg_list : arg_decl
+ | arg_decl comma arg_list
+;
+
+func_type : lpar rpar colon type
+ | lpar arg_list rpar colon type
+;
+
+func_decl : ident func_type;
+
+func_def : func_decl lbra stmt_list rbra
+ | func_decl semi
+;
+
+decl : enum_decl
+ | struct_decl
+ | func_def
+;
+
+program : decl
+ | decl program
+;
diff --git a/genlex.c b/genlex.c
@@ -0,0 +1,1115 @@
+getchar(): int {
+ var b: byte;
+ var ret: int;
+ ret = read(0, &b, 1);
+ if (ret < 0) {
+ exit(3);
+ }
+ if (ret == 0) {
+ return -1;
+ }
+ return b: int;
+}
+
+putchar(ch: int): void {
+ var b: byte;
+ var ret: int;
+ b = ch: byte;
+ ret = write(1, &b, 1);
+ if (ret != 1) {
+ exit(3);
+ }
+}
+
+struct compiler {
+ a: alloc;
+ nc: int;
+ lineno: int;
+ colno: int;
+ tt: int;
+ n: *nfa;
+ buf: *byte;
+ tlen: int;
+ tmax: int;
+ tags: *tag;
+ ntags: int;
+ nnfa: int;
+ ndfa: int;
+ d: *dfa;
+}
+
+setup(c: *compiler): void {
+ setup_alloc(&c.a);
+ c.nc = getchar();
+ c.lineno = 1;
+ c.colno = 1;
+ c.tt = 0;
+ c.n = 0: *nfa;
+ c.tmax = 256;
+ c.tlen = 0;
+ c.buf = alloc(&c.a, c.tmax);
+ c.tags = 0: *tag;
+ c.ntags = 0;
+ c.nnfa = 0;
+ c.ndfa = 0;
+ c.d = 0:*dfa;
+ feed(c);
+}
+
+feedc(c: *compiler): void {
+ c.nc = getchar();
+ if (c.nc == '\n') {
+ c.lineno = c.lineno + 1;
+ c.colno = 0;
+ }
+ c.colno = c.colno +1;
+}
+
+enum {
+ T_EOF,
+ T_IDENT,
+ T_LITERAL,
+ T_CHARSET,
+ T_DOT,
+ T_STAR,
+ T_PLUS,
+ T_QMARK,
+ T_LPAR,
+ T_RPAR,
+ T_ALT,
+ T_EQUALS,
+ T_SEMI,
+}
+
+feed(c: *compiler): void {
+ c.n = 0:*nfa;
+ c.tlen = 0;
+
+ loop {
+ if (c.nc == -1) {
+ c.tt = T_EOF;
+ return;
+ } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r' || c.nc == '\n') {
+ feedc(c);
+ } else if (c.nc == '/') {
+ feedc(c);
+ if (c.nc == '/') {
+ loop {
+ if (c.nc == '\n' || c.nc == -1) {
+ break;
+ }
+ feedc(c);
+ }
+ } else {
+ die("slash but a comment");
+ }
+ } else {
+ break;
+ }
+ }
+
+ if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') {
+ feed_ident(c);
+ } else if (c.nc == '"') {
+ feed_literal(c);
+ } else if (c.nc == '[') {
+ feed_charset(c);
+ } else if (c.nc == '.') {
+ c.tt = T_DOT;
+ feedc(c);
+ } else if (c.nc == '*') {
+ c.tt = T_STAR;
+ feedc(c);
+ } else if (c.nc == '+') {
+ c.tt = T_PLUS;
+ feedc(c);
+ } else if (c.nc == '?') {
+ c.tt = T_QMARK;
+ feedc(c);
+ } else if (c.nc == '(') {
+ c.tt = T_LPAR;
+ feedc(c);
+ } else if (c.nc == ')') {
+ c.tt = T_RPAR;
+ feedc(c);
+ } else if (c.nc == '|') {
+ c.tt = T_ALT;
+ feedc(c);
+ } else if (c.nc == '=') {
+ c.tt = T_EQUALS;
+ feedc(c);
+ } else if (c.nc == ';') {
+ c.tt = T_SEMI;
+ feedc(c);
+ } else {
+ die("invalid char");
+ }
+}
+
+feed_ident(c: *compiler): void {
+ c.tt = T_IDENT;
+ loop {
+ if (!((c.nc >= 'a' && c.nc <= 'z') ||
+ (c.nc >= 'A' && c.nc <= 'Z') ||
+ (c.nc >= '0' && c.nc <= '9') ||
+ c.nc == '_')) {
+ break;
+ }
+
+ c.buf[c.tlen] = c.nc:byte;
+ c.tlen = c.tlen + 1;
+ if (c.tlen == c.tmax) {
+ die("ident too long");
+ }
+ c.buf[c.tlen] = 0:byte;
+
+ feedc(c);
+ }
+}
+
+hexdig(c: *compiler): int {
+ if (c.nc >= '0' && c.nc <= '9') {
+ return c.nc - '0';
+ }
+
+ if (c.nc >= 'A' && c.nc <= 'F') {
+ return (c.nc - 'A') + 10;
+ }
+
+ if (c.nc >= 'a' && c.nc <= 'f') {
+ return (c.nc - 'a') + 10;
+ }
+
+ die("invalid hex digit");
+}
+
+feed_escape(c: *compiler): void {
+ var hex: int;
+
+ feedc(c);
+
+ if (c.nc == 't') {
+ c.nc = '\t';
+ } else if (c.nc == 'r') {
+ c.nc = '\r';
+ } else if (c.nc == 'n') {
+ c.nc = '\n';
+ } else if (c.nc == '\\') {
+ c.nc = '\\';
+ } else if (c.nc == '\'') {
+ c.nc = '\'';
+ } else if (c.nc == '\"') {
+ c.nc = '\"';
+ } else if (c.nc == '-') {
+ c.nc = '-';
+ } else if (c.nc == '[') {
+ c.nc = '[';
+ } else if (c.nc == ']') {
+ c.nc = ']';
+ } else if (c.nc == 'x') {
+ feedc(c);
+ hex = hexdig(c) * 16;
+ feedc(c);
+ hex = hex + hexdig(c);
+ c.nc = hex;
+ } else {
+ die("invalid escape");
+ }
+}
+
+feed_literal(c: *compiler): void {
+ var a: *nfa;
+
+ c.tt = T_LITERAL;
+ c.n = nfa_empty(c);
+
+ feedc(c);
+
+ loop {
+ if (c.nc == '"') {
+ feedc(c);
+ return;
+ }
+
+ if (c.nc == -1 || c.nc == 0 || c.nc == '\n') {
+ die("invalid char in literal");
+ }
+
+ if (c.nc == '\\') {
+ feed_escape(c);
+ }
+
+ a = nfa_literal(c, c.nc: byte);
+ c.n = nfa_concat(c, c.n, a);
+
+ feedc(c);
+ }
+}
+
+feed_charset(c: *compiler): void {
+ var left: int;
+ var right: int;
+ var mode: int;
+ var nonempty: int;
+ var i: int;
+ var a: *nfa;
+
+ c.tt = T_CHARSET;
+
+ feedc(c);
+
+ mode = 1;
+ nonempty = 0;
+
+ i = left;
+ loop {
+ if (i == 256) {
+ break;
+ }
+ c.buf[i] = 0: byte;
+ i = i + 1;
+ }
+
+ loop {
+ if (c.nc == ']') {
+ feedc(c);
+ break;
+ }
+
+ if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') {
+ die("invalid char in charset");
+ }
+
+ if (c.nc == '^') {
+ if (!mode) {
+ die("invalid char in charset");
+ }
+
+ feedc(c);
+ if (mode && !nonempty) {
+ i = 0;
+ loop {
+ if (i == 256) {
+ break;
+ }
+ c.buf[i] = 1:byte;
+ i = i + 1;
+ }
+ }
+ mode = 0;
+ continue;
+ }
+
+ if (c.nc == '\\') {
+ feed_escape(c);
+ left = c.nc;
+ feedc(c);
+ } else {
+ left = c.nc;
+ feedc(c);
+ }
+
+ if (c.nc == '-') {
+ feedc(c);
+
+ if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') {
+ die("invalid char in charset");
+ }
+
+ if (c.nc == '\\') {
+ feed_escape(c);
+ right = c.nc + 1;
+ feedc(c);
+ } else {
+ right = c.nc + 1;
+ feedc(c);
+ }
+ } else {
+ right = left + 1;
+ }
+
+ nonempty = 1;
+ i = left;
+ loop {
+ if (i >= right) {
+ break;
+ }
+ c.buf[i] = mode: byte;
+ i = i + 1;
+ }
+ }
+
+ c.n = nfa_empty(c);
+ c.n.left = 256;
+ c.n.right = 256;
+
+ i = 0;
+ loop {
+ loop {
+ if (i == 256) {
+ left = 256;
+ break;
+ }
+
+ if (c.buf[i]) {
+ left = i;
+ break;
+ }
+
+ i = i + 1;
+ }
+
+ loop {
+ if (i == 256) {
+ right = 256;
+ break;
+ }
+
+ if (!c.buf[i]) {
+ right = i;
+ break;
+ }
+
+ i = i + 1;
+ }
+
+ if (left >= right) {
+ break;
+ }
+
+ a = nfa_empty(c);
+ a.left = left;
+ a.right = right;
+
+ c.n = nfa_alt(c, c.n, a);
+ }
+}
+
+parse_ident(c: *compiler): *tag {
+ var t: *tag;
+ if (c.tt != T_IDENT) {
+ return 0: *tag;
+ }
+ t = find_tag(c, c.buf);
+ feed(c);
+ return t;
+}
+
+struct tag {
+ next: *tag;
+ s: *byte;
+ id: int;
+}
+
+intern(c: *compiler): *byte {
+ var s: *byte;
+ var i: int;
+ s = alloc(&c.a, c.tlen + 1);
+ i = 0;
+ loop {
+ if (i == c.tlen) {
+ break;
+ }
+ s[i] = c.buf[i];
+ i = i + 1;
+ }
+ s[c.tlen] = 0:byte;
+ return s;
+}
+
+find_tag(c: *compiler, s: *byte): *tag {
+ var t: *tag;
+ var link: **tag;
+ link = &c.tags;
+ loop {
+ t = *link;
+ if (!t) {
+ break;
+ }
+ if (!strcmp(t.s, s)) {
+ return t;
+ }
+ link = &t.next;
+ }
+ t = alloc(&c.a, sizeof(*t)): *tag;
+ t.next = 0:*tag;
+ t.s = intern(c);
+ t.id = c.ntags;
+ c.ntags = c.ntags + 1;
+ *link = t;
+ return t;
+}
+
+struct nfa {
+ id: int;
+ tag: *tag;
+ left: int;
+ right: int;
+ live: int;
+ a: *nfa;
+ b: *nfa;
+ end: *nfa;
+}
+
+nfa_empty(c: *compiler): *nfa {
+ var n: *nfa;
+ n = alloc(&c.a, sizeof(*n)):*nfa;
+ n.id = c.nnfa;
+ n.left = -1;
+ n.right = -1;
+ n.live = 0;
+ n.a = 0:*nfa;
+ n.b = 0:*nfa;
+ n.end = n;
+ c.nnfa = c.nnfa + 1;
+ return n;
+}
+
+nfa_literal(c: *compiler, a: byte): *nfa {
+ var n: *nfa;
+ n = nfa_empty(c);
+ n.left = a:int;
+ n.right = n.left + 1;
+ return n;
+}
+
+nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa {
+ var i: *nfa;
+ var o: *nfa;
+ i = nfa_empty(c);
+ o = nfa_empty(c);
+ i.a = a;
+ i.b = b;
+ i.end = o;
+ a.end.a = o;
+ b.end.a = o;
+ return i;
+}
+
+nfa_concat(c: *compiler, a: *nfa, b: *nfa): *nfa {
+ a.end.a = b;
+ a.end = b.end;
+ return a;
+}
+
+nfa_plus(c: *compiler, a: *nfa): *nfa {
+ var o: *nfa;
+ o = nfa_empty(c);
+ o.b = a;
+ a.end.a = o;
+ a.end = o;
+ return a;
+}
+
+nfa_qmark(c: *compiler, a: *nfa): *nfa {
+ var b: *nfa;
+ b = nfa_empty(c);
+ a = nfa_alt(c, a, b);
+ return a;
+}
+
+nfa_star(c: *compiler, a: *nfa): *nfa {
+ a = nfa_plus(c, a);
+ a = nfa_qmark(c, a);
+ return a;
+}
+
+parse_literal(c: *compiler): *nfa {
+ var n: *nfa;
+ if (c.tt != T_LITERAL) {
+ return 0:*nfa;
+ }
+ n = c.n;
+ feed(c);
+ return n;
+}
+
+parse_charset(c: *compiler): *nfa {
+ var n: *nfa;
+ if (c.tt != T_CHARSET) {
+ return 0:*nfa;
+ }
+ n = c.n;
+ feed(c);
+ return n;
+}
+
+parse_dot(c: *compiler): *nfa {
+ var n: *nfa;
+ if (c.tt != T_DOT) {
+ return 0:*nfa;
+ }
+ feed(c);
+ n = nfa_empty(c);
+ n.left = 0;
+ n.right = 256;
+ return n;
+}
+
+// primary := literal
+// | charset
+// | dot
+// | '(' regex ')'
+parse_primary(c: *compiler): *nfa {
+ var n: *nfa;
+
+ n = parse_literal(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_charset(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_dot(c);
+ if (n) {
+ return n;
+ }
+
+ if (c.tt == T_LPAR) {
+ feed(c);
+
+ n = parse_regex(c);
+
+ if (c.tt != T_RPAR) {
+ die("expected )");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ return 0: *nfa;
+}
+
+// post := primary
+// | post '*'
+// | post '+'
+// | post '?'
+parse_post(c: *compiler): *nfa {
+ var n: *nfa;
+
+ n = parse_primary(c);
+ if (!n) {
+ return 0: *nfa;
+ }
+
+ loop {
+ if (c.tt == T_STAR) {
+ n = nfa_star(c, n);
+ feed(c);
+ } else if (c.tt == T_PLUS) {
+ n = nfa_plus(c, n);
+ feed(c);
+ } else if (c.tt == T_QMARK) {
+ n = nfa_qmark(c, n);
+ feed(c);
+ } else {
+ return n;
+ }
+ }
+}
+
+// concat := post
+// | post concat
+parse_concat(c: *compiler): *nfa {
+ var n: *nfa;
+ var b: *nfa;
+
+ n = parse_post(c);
+ if (!n) {
+ return 0:*nfa;
+ }
+
+ loop {
+ b = parse_post(c);
+ if (!b) {
+ return n;
+ }
+
+ n = nfa_concat(c, n, b);
+ }
+}
+
+// regex := concat
+// | '|' regex
+// | concat '|' regex
+parse_regex(c: *compiler): *nfa {
+ var n: *nfa;
+ var b: *nfa;
+
+ n = parse_concat(c);
+ if (!n) {
+ n = nfa_empty(c);
+ }
+
+ loop {
+ if (c.tt != T_ALT) {
+ return n;
+ }
+ feed(c);
+
+ b = parse_concat(c);
+ if (!b) {
+ b = nfa_empty(c);
+ }
+
+ n = nfa_alt(c, n, b);
+ }
+}
+
+// decl := ident '=' regex ';'
+parse_decl(c: *compiler): *nfa {
+ var regex: *nfa;
+ var t: *tag;
+ var n: *nfa;
+
+ t = parse_ident(c);
+ if (!t) {
+ return 0: *nfa;
+ }
+
+ if (c.tt != T_EQUALS) {
+ die("expected =");
+ }
+ feed(c);
+
+ regex = parse_regex(c);
+
+ if (c.tt != T_SEMI) {
+ die("expected ;");
+ }
+ feed(c);
+
+ n = nfa_empty(c);
+ n.tag = t;
+
+ return nfa_concat(c, regex, n);
+}
+
+// progam := decl
+// | decl program
+parse_program(c: *compiler): *nfa {
+ var n: *nfa;
+ var p: *nfa;
+
+ p = 0: *nfa;
+ loop {
+ n = parse_decl(c);
+ if (!n) {
+ if (c.tt != T_EOF) {
+ die("expected eof");
+ }
+ return p;
+ }
+
+ if (p) {
+ p = nfa_alt(c, p, n);
+ } else {
+ p = n;
+ }
+ }
+}
+
+struct dfa {
+ id: int;
+ link: **dfa;
+ key: nlist;
+ l: *dfa;
+ r: *dfa;
+ seen: int;
+}
+
+struct nlist {
+ live: **nfa;
+ fill: int;
+ cap: int;
+ tag: *tag;
+}
+
+alloc_nlist(c: *compiler, l: *nlist, cap: int): void {
+ l.cap = cap;
+ l.live = alloc(&c.a, sizeof(*l.live) * cap):**nfa;
+ l.fill = 0;
+ l.tag = 0:*tag;
+}
+
+activate(l: *nlist, n: *nfa): void {
+ if (n.live) {
+ return;
+ }
+
+ n.live = 1;
+
+ l.live[l.fill] = n;
+ l.fill = l.fill + 1;
+
+ if (n.left == -1) {
+ if ((!l.tag) || (n.tag && n.tag.id < l.tag.id)) {
+ l.tag = n.tag;
+ }
+
+ if (n.a) {
+ activate(l, n.a);
+ }
+
+ if (n.b) {
+ activate(l, n.b);
+ }
+ }
+}
+
+nlist_cmp(a: *nlist, b: *nlist): int {
+ var i: int;
+
+ i = 0;
+ loop {
+ if (i == a.fill && i == b.fill) {
+ break;
+ }
+
+ if (i == a.fill) {
+ return -1;
+ }
+
+ if (i == b.fill) {
+ return 1;
+ }
+
+ if (a.live[i].id > b.live[i].id) {
+ return 1;
+ }
+
+ if (a.live[i].id < b.live[i].id) {
+ return -1;
+ }
+
+ i = i + 1;
+ }
+
+ if (!a.tag && !b.tag) {
+ return 0;
+ }
+
+ if (!a.tag) {
+ return -1;
+ }
+
+ if (!b.tag) {
+ return 1;
+ }
+
+ if (a.tag.id < b.tag.id) {
+ return -1;
+ }
+
+ if (a.tag.id > b.tag.id) {
+ return 1;
+ }
+
+ return 0;
+}
+
+nlist_sort(l: *nlist): void {
+ var i: int;
+ var j: int;
+ var k: int;
+ var tmp: *nfa;
+
+ if (l.fill < 2) {
+ return;
+ }
+
+ i = l.fill - 1;
+ loop {
+ j = i;
+ loop {
+ k = (j << 1) + 1;
+ if (k >= l.fill) {
+ break;
+ }
+ if (k + 1 < l.fill && l.live[k].id < l.live[k + 1].id) {
+ k = k + 1;
+ }
+ if (l.live[j].id >= l.live[k].id) {
+ break;
+ }
+ tmp = l.live[k];
+ l.live[k] = l.live[j];
+ l.live[j] = tmp;
+ j = k;
+ }
+ if (i == 0) {
+ break;
+ }
+ i = i - 1;
+ }
+
+ i = l.fill - 1;
+ loop {
+ if (i == 0) {
+ break;
+ }
+ tmp = l.live[i];
+ l.live[i] = l.live[0];
+ j = 0;
+ loop {
+ k = (j << 1) + 1;
+ if (k >= i) {
+ break;
+ }
+ if (k + 1 < i && l.live[k].id < l.live[k + 1].id) {
+ k = k + 1;
+ }
+ if (tmp.id >= l.live[k].id) {
+ break;
+ }
+ j = k;
+ }
+ l.live[j] = tmp;
+ i = i - 1;
+ }
+}
+
+alloc_link(c: *compiler): **dfa {
+ var link: **dfa;
+ var i: int;
+ link = alloc(&c.a, sizeof(*link) * 256): **dfa;
+ i = 0;
+ loop {
+ if (i == 256) {
+ break;
+ }
+ link[i] = 0:*dfa;
+ i = i + 1;
+ }
+ return link;
+}
+
+nlist_copy(c: *compiler, dest: *nlist, src: *nlist): void {
+ var i: int;
+ alloc_nlist(c, dest, src.fill);
+ dest.fill = src.fill;
+ dest.tag = src.tag;
+ i = 0;
+ loop {
+ if (i >= src.fill) {
+ break;
+ }
+ dest.live[i] = src.live[i];
+ i = i + 1;
+ }
+}
+
+nlist2dfa(c: *compiler, l: *nlist): *dfa {
+ var link: **dfa;
+ var d: *dfa;
+ var n: *nfa;
+ var t: *tag;
+ var dir: int;
+ var i: int;
+ var j: int;
+
+ if (l.fill == 0 && !l.tag) {
+ return 0:*dfa;
+ }
+
+ link = &c.d;
+ loop {
+ d = *link;
+ if (!d) {
+ break;
+ }
+
+ dir = nlist_cmp(l, &d.key);
+
+ if (dir < 0) {
+ link = &d.l;
+ } else if (dir > 0) {
+ link = &d.r;
+ } else {
+ return d;
+ }
+ }
+
+ d = alloc(&c.a, sizeof(*d)): *dfa;
+ d.id = c.ndfa;
+ d.link = alloc_link(c);
+ nlist_copy(c, &d.key, l);
+ d.l = 0: *dfa;
+ d.r = 0: *dfa;
+ d.seen = 0;
+
+ c.ndfa = c.ndfa + 1;
+
+ *link = d;
+
+ i = 0;
+ loop {
+ if (i == 256) {
+ break;
+ }
+
+ deactivate(l);
+ j = 0;
+ loop {
+ if (j >= d.key.fill) {
+ break;
+ }
+
+ n = d.key.live[j];
+ if (n.left <= i && i < n.right) {
+ if (n.a) {
+ activate(l, n.a);
+ }
+ if (n.b) {
+ activate(l, n.b);
+ }
+ }
+
+ j = j + 1;
+ }
+
+ d.link[i] = nlist2dfa(c, l);
+
+ i = i + 1;
+ }
+
+ return d;
+}
+
+deactivate(l: *nlist): void {
+ var i: int;
+ i = 0;
+ loop {
+ if (i >= l.fill) {
+ l.fill = 0;
+ l.tag = 0: *tag;
+ break;
+ }
+ l.live[i].live = 0;
+ i = i + 1;
+ }
+}
+
+powerset(c: *compiler, n: *nfa): *dfa {
+ var live: nlist;
+ alloc_nlist(c, &live, c.nnfa);
+ activate(&live, n);
+ return nlist2dfa(c, &live);
+}
+
+codegen(c: *compiler, a: *dfa): void {
+ var i: int;
+ var b: *dfa;
+ var lo: int;
+ var hi: int;
+
+ if (a.seen) {
+ return;
+ }
+ a.seen = 1;
+
+ fdputs(1, ":_");
+ fdputd(1, a.id);
+ fdputs(1, ";\n");
+
+ if (a.key.tag) {
+ fdputs(1, "\tlexmark(l, T_");
+ fdputs(1, a.key.tag.s);
+ fdputs(1, ");\n");
+ }
+
+ fdputs(1, "\tch = lexfeedc(l);\n");
+
+ i = 0;
+ loop {
+ loop {
+ if (i == 256 || a.link[i]) {
+ lo = i;
+ break;
+ }
+ i = i + 1;
+ }
+
+ if (lo == 256) {
+ break;
+ }
+
+ b = a.link[i];
+
+ loop {
+ if (i == 256 || a.link[i] != b) {
+ hi = i;
+ break;
+ }
+ i = i + 1;
+ }
+
+ if (lo != (hi - 1)) {
+ fdputs(1, "\tif (ch >= ");
+ fdputd(1, lo);
+ fdputs(1, " && ch <= ");
+ fdputd(1, hi - 1);
+ } else {
+ fdputs(1, "\tif (ch == ");
+ fdputd(1, lo);
+ }
+ fdputs(1, ") { ");
+ fdputs(1, "goto _");
+ fdputd(1, b.id);
+ fdputs(1, "; }\n");
+
+ }
+
+ fdputs(1, "\treturn;\n");
+
+ i = 0;
+ loop {
+ if (i == 256) {
+ break;
+ }
+
+ if (a.link[i]) {
+ codegen(c, a.link[i]);
+ }
+
+ i = i + 1;
+ }
+}
+
+gen(c: *compiler, a: *dfa): void {
+ var t: *tag;
+ t = c.tags;
+ fdputs(1, "enum {\n");
+ fdputs(1, "\tT_INVALID,\n");
+ fdputs(1, "\tT_EOF,\n");
+ loop {
+ if (!t) {
+ break;
+ }
+ fdputs(1, "\tT_");
+ fdputs(1, t.s);
+ fdputs(1, ",\n");
+ t = t.next;
+ }
+ fdputs(1, "}\n");
+ fdputs(1, "\n");
+ fdputs(1, "lexstep(l: *lex_state): void {\n");
+ fdputs(1, "\tvar ch: int;\n");
+ fdputs(1, "\tlexmark(l, T_INVALID);\n");
+ codegen(c, a);
+ fdputs(1, "}\n");
+}
+
+main(argc: int, argv: **byte, envp: **byte): void {
+ var c: compiler;
+ var n: *nfa;
+ var a: *dfa;
+ setup(&c);
+ n = parse_program(&c);
+ a = powerset(&c, n);
+ gen(&c, a);
+}
diff --git a/lex.c b/lex.c
@@ -1,1289 +0,0 @@
-syscall(n: int, a1: int, a2: int, a3: int, a4: int, a5: int, a6: int): int;
-
-read(fd: int, buf: *byte, n: int): int {
- return syscall(0, fd, buf: int, n, 0, 0, 0);
-}
-
-write(fd: int, buf: *byte, n: int): int {
- return syscall(1, fd, buf: int, n, 0, 0, 0);
-}
-
-getchar(): int {
- var b: byte;
- var ret: int;
- ret = read(0, &b, 1);
- if (ret < 0) {
- exit(3);
- }
- if (ret == 0) {
- return -1;
- }
- return b: int;
-}
-
-putchar(ch: int): void {
- var b: byte;
- var ret: int;
- b = ch: byte;
- ret = write(1, &b, 1);
- if (ret != 1) {
- exit(3);
- }
-}
-
-exit(n: int): void {
- syscall(60, n, 0, 0, 0, 0, 0);
-}
-
-strcmp(a: *byte, b: *byte): int {
- var i: int;
-
- i = 0;
-
- loop {
- if (a[i] > b[i]) {
- return 1;
- }
-
- if (a[i] < b[i]) {
- return -1;
- }
-
- if (a[i] == 0:byte) {
- return 0;
- }
-
- i = i + 1;
- }
-}
-
-fdputc(fd: int, ch: int): void {
- var b: byte;
- var ret: int;
- b = ch: byte;
- ret = write(fd, &b, 1);
- if (ret != 1) {
- exit(3);
- }
-}
-
-
-fdputd(fd: int, n: int): void {
- var a: int;
-
- if (n < 0) {
- fdputc(fd, '-');
- a = -(n % 10);
- n = n / -10;
- } else {
- a = n % 10;
- n = n / 10;
- }
-
- if (n != 0) {
- fdputd(fd, n);
- }
-
- fdputc(fd, '0' + a);
-}
-
-fdput(fd: int, msg: *byte): void {
- var len: int;
- var ret: int;
- var off: int;
- len = strlen(msg);
- off = 0;
- loop {
- if (off == len) {
- break;
- }
- ret = write(fd, msg, len - off);
- if (ret < 0) {
- exit(3);
- }
- off = off + ret;
- }
-}
-
-die(msg: *byte): void {
- fdput(2, "die: ");
- fdput(2, msg);
- fdput(2, "\n");
- exit(1);
-}
-
-strlen(s: *byte): int {
- var ret: int;
- ret = 0;
- loop {
- if (s[ret] == 0:byte) {
- return ret;
- }
- ret = ret + 1;
- }
-}
-
-mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int {
- return syscall(9, addr, len, prot, flags, fd, off);
-}
-
-struct page {
- ptr: *byte;
- fill: int;
- size: int;
-}
-
-struct allocator {
- page: *page;
-}
-
-setup_alloc(a: *allocator): void {
- a.page = 0:*page;
-}
-
-alloc(a: *allocator, size: int): *byte {
- var page: *page;
- var mret: int;
- var ret: *byte;
- var psize: int;
-
- if (size < 0) {
- die("invalid alloc");
- }
-
- if (size >= 2048) {
- size = size + 4095;
- size = size & ~4095;
- mret = mmap(0, size, 3, 0x22, -1, 0);
- if (mret == -1) {
- die("out of memory");
- }
- ret = mret: *byte;
- return ret;
- }
-
- page = a.page;
- if (page) {
- if (size <= page.size - page.fill) {
- mret = page.ptr:int + page.fill;
- page.fill = page.fill + size;
- ret = mret: *byte;
- return ret;
- }
- }
-
- psize = 64 * 1024;
-
- mret = mmap(0, psize, 3, 0x22, -1, 0);
- if (mret == -1) {
- die("out of memory");
- }
-
- page = mret: *page;
- page.ptr = (&page[1]): *byte;
- ret = page.ptr;
- page.size = psize - sizeof(*page);
- page.fill = size;
-
- a.page = page;
-
- return ret;
-}
-
-_start(): void {
- main();
- exit(0);
-}
-
-struct compiler {
- a: allocator;
- nc: int;
- lineno: int;
- colno: int;
- tt: int;
- n: *nfa;
- buf: *byte;
- tlen: int;
- tmax: int;
- tags: *tag;
- ntags: int;
- nnfa: int;
- ndfa: int;
- d: *dfa;
-}
-
-setup(c: *compiler): void {
- setup_alloc(&c.a);
- c.nc = getchar();
- c.lineno = 1;
- c.colno = 1;
- c.tt = 0;
- c.n = 0: *nfa;
- c.tmax = 256;
- c.tlen = 0;
- c.buf = alloc(&c.a, c.tmax);
- c.tags = 0: *tag;
- c.ntags = 0;
- c.nnfa = 0;
- c.ndfa = 0;
- c.d = 0:*dfa;
- feed(c);
-}
-
-feedc(c: *compiler): void {
- c.nc = getchar();
- if (c.nc == '\n') {
- c.lineno = c.lineno + 1;
- c.colno = 0;
- }
- c.colno = c.colno +1;
-}
-
-enum {
- T_EOF,
- T_IDENT,
- T_LITERAL,
- T_CHARSET,
- T_DOT,
- T_STAR,
- T_PLUS,
- T_QMARK,
- T_LPAR,
- T_RPAR,
- T_ALT,
- T_EQUALS,
- T_SEMI,
-}
-
-feed(c: *compiler): void {
- c.n = 0:*nfa;
- c.tlen = 0;
-
- loop {
- if (c.nc == -1) {
- c.tt = T_EOF;
- return;
- } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r' || c.nc == '\n') {
- feedc(c);
- } else if (c.nc == '/') {
- feedc(c);
- if (c.nc == '/') {
- loop {
- if (c.nc == '\n' || c.nc == -1) {
- break;
- }
- feedc(c);
- }
- } else {
- die("slash but a comment");
- }
- } else {
- break;
- }
- }
-
- if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') {
- feed_ident(c);
- } else if (c.nc == '"') {
- feed_literal(c);
- } else if (c.nc == '[') {
- feed_charset(c);
- } else if (c.nc == '.') {
- c.tt = T_DOT;
- feedc(c);
- } else if (c.nc == '*') {
- c.tt = T_STAR;
- feedc(c);
- } else if (c.nc == '+') {
- c.tt = T_PLUS;
- feedc(c);
- } else if (c.nc == '?') {
- c.tt = T_QMARK;
- feedc(c);
- } else if (c.nc == '(') {
- c.tt = T_LPAR;
- feedc(c);
- } else if (c.nc == ')') {
- c.tt = T_RPAR;
- feedc(c);
- } else if (c.nc == '|') {
- c.tt = T_ALT;
- feedc(c);
- } else if (c.nc == '=') {
- c.tt = T_EQUALS;
- feedc(c);
- } else if (c.nc == ';') {
- c.tt = T_SEMI;
- feedc(c);
- } else {
- die("invalid char");
- }
-}
-
-feed_ident(c: *compiler): void {
- c.tt = T_IDENT;
- loop {
- if (!((c.nc >= 'a' && c.nc <= 'z') ||
- (c.nc >= 'A' && c.nc <= 'Z') ||
- (c.nc >= '0' && c.nc <= '9') ||
- c.nc == '_')) {
- break;
- }
-
- c.buf[c.tlen] = c.nc:byte;
- c.tlen = c.tlen + 1;
- if (c.tlen == c.tmax) {
- die("ident too long");
- }
- c.buf[c.tlen] = 0:byte;
-
- feedc(c);
- }
-}
-
-hexdig(c: *compiler): int {
- if (c.nc >= '0' && c.nc <= '9') {
- return c.nc - '0';
- }
-
- if (c.nc >= 'A' && c.nc <= 'F') {
- return (c.nc - 'A') + 10;
- }
-
- if (c.nc >= 'a' && c.nc <= 'f') {
- return (c.nc - 'a') + 10;
- }
-
- die("invalid hex digit");
-}
-
-feed_escape(c: *compiler): void {
- var hex: int;
-
- feedc(c);
-
- if (c.nc == 't') {
- c.nc = '\t';
- } else if (c.nc == 'r') {
- c.nc = '\r';
- } else if (c.nc == 'n') {
- c.nc = '\n';
- } else if (c.nc == '\\') {
- c.nc = '\\';
- } else if (c.nc == '\'') {
- c.nc = '\'';
- } else if (c.nc == '\"') {
- c.nc = '\"';
- } else if (c.nc == '-') {
- c.nc = '-';
- } else if (c.nc == '[') {
- c.nc = '[';
- } else if (c.nc == ']') {
- c.nc = ']';
- } else if (c.nc == 'x') {
- feedc(c);
- hex = hexdig(c) * 16;
- feedc(c);
- hex = hex + hexdig(c);
- c.nc = hex;
- } else {
- die("invalid escape");
- }
-}
-
-feed_literal(c: *compiler): void {
- var a: *nfa;
-
- c.tt = T_LITERAL;
- c.n = nfa_empty(c);
-
- feedc(c);
-
- loop {
- if (c.nc == '"') {
- feedc(c);
- return;
- }
-
- if (c.nc == -1 || c.nc == 0 || c.nc == '\n') {
- die("invalid char in literal");
- }
-
- if (c.nc == '\\') {
- feed_escape(c);
- }
-
- a = nfa_literal(c, c.nc: byte);
- c.n = nfa_concat(c, c.n, a);
-
- feedc(c);
- }
-}
-
-feed_charset(c: *compiler): void {
- var left: int;
- var right: int;
- var mode: int;
- var nonempty: int;
- var i: int;
- var a: *nfa;
-
- c.tt = T_CHARSET;
-
- feedc(c);
-
- mode = 1;
- nonempty = 0;
-
- i = left;
- loop {
- if (i == 256) {
- break;
- }
- c.buf[i] = 0: byte;
- i = i + 1;
- }
-
- loop {
- if (c.nc == ']') {
- feedc(c);
- break;
- }
-
- if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') {
- die("invalid char in charset");
- }
-
- if (c.nc == '^') {
- if (!mode) {
- die("invalid char in charset");
- }
-
- feedc(c);
- if (mode && !nonempty) {
- i = 0;
- loop {
- if (i == 256) {
- break;
- }
- c.buf[i] = 1:byte;
- i = i + 1;
- }
- }
- mode = 0;
- continue;
- }
-
- if (c.nc == '\\') {
- feed_escape(c);
- left = c.nc;
- feedc(c);
- } else {
- left = c.nc;
- feedc(c);
- }
-
- if (c.nc == '-') {
- feedc(c);
-
- if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') {
- die("invalid char in charset");
- }
-
- if (c.nc == '\\') {
- feed_escape(c);
- right = c.nc + 1;
- feedc(c);
- } else {
- right = c.nc + 1;
- feedc(c);
- }
- } else {
- right = left + 1;
- }
-
- nonempty = 1;
- i = left;
- loop {
- if (i >= right) {
- break;
- }
- c.buf[i] = mode: byte;
- i = i + 1;
- }
- }
-
- c.n = nfa_empty(c);
- c.n.left = 256;
- c.n.right = 256;
-
- i = 0;
- loop {
- loop {
- if (i == 256) {
- left = 256;
- break;
- }
-
- if (c.buf[i]) {
- left = i;
- break;
- }
-
- i = i + 1;
- }
-
- loop {
- if (i == 256) {
- right = 256;
- break;
- }
-
- if (!c.buf[i]) {
- right = i;
- break;
- }
-
- i = i + 1;
- }
-
- if (left >= right) {
- break;
- }
-
- a = nfa_empty(c);
- a.left = left;
- a.right = right;
-
- c.n = nfa_alt(c, c.n, a);
- }
-}
-
-parse_ident(c: *compiler): *tag {
- var t: *tag;
- if (c.tt != T_IDENT) {
- return 0: *tag;
- }
- t = find_tag(c, c.buf);
- feed(c);
- return t;
-}
-
-struct tag {
- next: *tag;
- s: *byte;
- id: int;
-}
-
-intern(c: *compiler): *byte {
- var s: *byte;
- var i: int;
- s = alloc(&c.a, c.tlen + 1);
- i = 0;
- loop {
- if (i == c.tlen) {
- break;
- }
- s[i] = c.buf[i];
- i = i + 1;
- }
- s[c.tlen] = 0:byte;
- return s;
-}
-
-find_tag(c: *compiler, s: *byte): *tag {
- var t: *tag;
- var link: **tag;
- link = &c.tags;
- loop {
- t = *link;
- if (!t) {
- break;
- }
- if (!strcmp(t.s, s)) {
- return t;
- }
- link = &t.next;
- }
- t = alloc(&c.a, sizeof(*t)): *tag;
- t.next = 0:*tag;
- t.s = intern(c);
- t.id = c.ntags;
- c.ntags = c.ntags + 1;
- *link = t;
- return t;
-}
-
-struct nfa {
- id: int;
- tag: *tag;
- left: int;
- right: int;
- live: int;
- a: *nfa;
- b: *nfa;
- end: *nfa;
-}
-
-nfa_empty(c: *compiler): *nfa {
- var n: *nfa;
- n = alloc(&c.a, sizeof(*n)):*nfa;
- n.id = c.nnfa;
- n.left = -1;
- n.right = -1;
- n.live = 0;
- n.a = 0:*nfa;
- n.b = 0:*nfa;
- n.end = n;
- c.nnfa = c.nnfa + 1;
- return n;
-}
-
-nfa_literal(c: *compiler, a: byte): *nfa {
- var n: *nfa;
- n = nfa_empty(c);
- n.left = a:int;
- n.right = n.left + 1;
- return n;
-}
-
-nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa {
- var i: *nfa;
- var o: *nfa;
- i = nfa_empty(c);
- o = nfa_empty(c);
- i.a = a;
- i.b = b;
- i.end = o;
- a.end.a = o;
- b.end.a = o;
- return i;
-}
-
-nfa_concat(c: *compiler, a: *nfa, b: *nfa): *nfa {
- a.end.a = b;
- a.end = b.end;
- return a;
-}
-
-nfa_plus(c: *compiler, a: *nfa): *nfa {
- var o: *nfa;
- o = nfa_empty(c);
- o.b = a;
- a.end.a = o;
- a.end = o;
- return a;
-}
-
-nfa_qmark(c: *compiler, a: *nfa): *nfa {
- var b: *nfa;
- b = nfa_empty(c);
- a = nfa_alt(c, a, b);
- return a;
-}
-
-nfa_star(c: *compiler, a: *nfa): *nfa {
- a = nfa_plus(c, a);
- a = nfa_qmark(c, a);
- return a;
-}
-
-parse_literal(c: *compiler): *nfa {
- var n: *nfa;
- if (c.tt != T_LITERAL) {
- return 0:*nfa;
- }
- n = c.n;
- feed(c);
- return n;
-}
-
-parse_charset(c: *compiler): *nfa {
- var n: *nfa;
- if (c.tt != T_CHARSET) {
- return 0:*nfa;
- }
- n = c.n;
- feed(c);
- return n;
-}
-
-parse_dot(c: *compiler): *nfa {
- var n: *nfa;
- if (c.tt != T_DOT) {
- return 0:*nfa;
- }
- feed(c);
- n = nfa_empty(c);
- n.left = 0;
- n.right = 256;
- return n;
-}
-
-// primary := literal
-// | charset
-// | dot
-// | '(' regex ')'
-parse_primary(c: *compiler): *nfa {
- var n: *nfa;
-
- n = parse_literal(c);
- if (n) {
- return n;
- }
-
- n = parse_charset(c);
- if (n) {
- return n;
- }
-
- n = parse_dot(c);
- if (n) {
- return n;
- }
-
- if (c.tt == T_LPAR) {
- feed(c);
-
- n = parse_regex(c);
-
- if (c.tt != T_RPAR) {
- die("expected )");
- }
- feed(c);
-
- return n;
- }
-
- return 0: *nfa;
-}
-
-// post := primary
-// | post '*'
-// | post '+'
-// | post '?'
-parse_post(c: *compiler): *nfa {
- var n: *nfa;
-
- n = parse_primary(c);
- if (!n) {
- return 0: *nfa;
- }
-
- loop {
- if (c.tt == T_STAR) {
- n = nfa_star(c, n);
- feed(c);
- } else if (c.tt == T_PLUS) {
- n = nfa_plus(c, n);
- feed(c);
- } else if (c.tt == T_QMARK) {
- n = nfa_qmark(c, n);
- feed(c);
- } else {
- return n;
- }
- }
-}
-
-// concat := post
-// | post concat
-parse_concat(c: *compiler): *nfa {
- var n: *nfa;
- var b: *nfa;
-
- n = parse_post(c);
- if (!n) {
- return 0:*nfa;
- }
-
- loop {
- b = parse_post(c);
- if (!b) {
- return n;
- }
-
- n = nfa_concat(c, n, b);
- }
-}
-
-// regex := concat
-// | '|' regex
-// | concat '|' regex
-parse_regex(c: *compiler): *nfa {
- var n: *nfa;
- var b: *nfa;
-
- n = parse_concat(c);
- if (!n) {
- n = nfa_empty(c);
- }
-
- loop {
- if (c.tt != T_ALT) {
- return n;
- }
- feed(c);
-
- b = parse_concat(c);
- if (!b) {
- b = nfa_empty(c);
- }
-
- n = nfa_alt(c, n, b);
- }
-}
-
-// decl := ident '=' regex ';'
-parse_decl(c: *compiler): *nfa {
- var regex: *nfa;
- var t: *tag;
- var n: *nfa;
-
- t = parse_ident(c);
- if (!t) {
- return 0: *nfa;
- }
-
- if (c.tt != T_EQUALS) {
- die("expected =");
- }
- feed(c);
-
- regex = parse_regex(c);
-
- if (c.tt != T_SEMI) {
- die("expected ;");
- }
- feed(c);
-
- n = nfa_empty(c);
- n.tag = t;
-
- return nfa_concat(c, regex, n);
-}
-
-// progam := decl
-// | decl program
-parse_program(c: *compiler): *nfa {
- var n: *nfa;
- var p: *nfa;
-
- p = 0: *nfa;
- loop {
- n = parse_decl(c);
- if (!n) {
- if (c.tt != T_EOF) {
- die("expected eof");
- }
- return p;
- }
-
- if (p) {
- p = nfa_alt(c, p, n);
- } else {
- p = n;
- }
- }
-}
-
-struct dfa {
- id: int;
- link: **dfa;
- key: nlist;
- l: *dfa;
- r: *dfa;
- seen: int;
-}
-
-struct nlist {
- live: **nfa;
- fill: int;
- cap: int;
- tag: *tag;
-}
-
-alloc_nlist(c: *compiler, l: *nlist, cap: int): void {
- l.cap = cap;
- l.live = alloc(&c.a, sizeof(*l.live) * cap):**nfa;
- l.fill = 0;
- l.tag = 0:*tag;
-}
-
-activate(l: *nlist, n: *nfa): void {
- if (n.live) {
- return;
- }
-
- n.live = 1;
-
- l.live[l.fill] = n;
- l.fill = l.fill + 1;
-
- if (n.left == -1) {
- if ((!l.tag) || (n.tag && n.tag.id < l.tag.id)) {
- l.tag = n.tag;
- }
-
- if (n.a) {
- activate(l, n.a);
- }
-
- if (n.b) {
- activate(l, n.b);
- }
- }
-}
-
-nlist_cmp(a: *nlist, b: *nlist): int {
- var i: int;
-
- i = 0;
- loop {
- if (i == a.fill && i == b.fill) {
- break;
- }
-
- if (i == a.fill) {
- return -1;
- }
-
- if (i == b.fill) {
- return 1;
- }
-
- if (a.live[i].id > b.live[i].id) {
- return 1;
- }
-
- if (a.live[i].id < b.live[i].id) {
- return -1;
- }
-
- i = i + 1;
- }
-
- if (!a.tag && !b.tag) {
- return 0;
- }
-
- if (!a.tag) {
- return -1;
- }
-
- if (!b.tag) {
- return 1;
- }
-
- if (a.tag.id < b.tag.id) {
- return -1;
- }
-
- if (a.tag.id > b.tag.id) {
- return 1;
- }
-
- return 0;
-}
-
-nlist_sort(l: *nlist): void {
- var i: int;
- var j: int;
- var k: int;
- var tmp: *nfa;
-
- if (l.fill < 2) {
- return;
- }
-
- i = l.fill - 1;
- loop {
- j = i;
- loop {
- k = (j << 1) + 1;
- if (k >= l.fill) {
- break;
- }
- if (k + 1 < l.fill && l.live[k].id < l.live[k + 1].id) {
- k = k + 1;
- }
- if (l.live[j].id >= l.live[k].id) {
- break;
- }
- tmp = l.live[k];
- l.live[k] = l.live[j];
- l.live[j] = tmp;
- j = k;
- }
- if (i == 0) {
- break;
- }
- i = i - 1;
- }
-
- i = l.fill - 1;
- loop {
- if (i == 0) {
- break;
- }
- tmp = l.live[i];
- l.live[i] = l.live[0];
- j = 0;
- loop {
- k = (j << 1) + 1;
- if (k >= i) {
- break;
- }
- if (k + 1 < i && l.live[k].id < l.live[k + 1].id) {
- k = k + 1;
- }
- if (tmp.id >= l.live[k].id) {
- break;
- }
- j = k;
- }
- l.live[j] = tmp;
- i = i - 1;
- }
-}
-
-alloc_link(c: *compiler): **dfa {
- var link: **dfa;
- var i: int;
- link = alloc(&c.a, sizeof(*link) * 256): **dfa;
- i = 0;
- loop {
- if (i == 256) {
- break;
- }
- link[i] = 0:*dfa;
- i = i + 1;
- }
- return link;
-}
-
-nlist_copy(c: *compiler, dest: *nlist, src: *nlist): void {
- var i: int;
- alloc_nlist(c, dest, src.fill);
- dest.fill = src.fill;
- dest.tag = src.tag;
- i = 0;
- loop {
- if (i >= src.fill) {
- break;
- }
- dest.live[i] = src.live[i];
- i = i + 1;
- }
-}
-
-nlist2dfa(c: *compiler, l: *nlist): *dfa {
- var link: **dfa;
- var d: *dfa;
- var n: *nfa;
- var t: *tag;
- var dir: int;
- var i: int;
- var j: int;
-
- if (l.fill == 0 && !l.tag) {
- return 0:*dfa;
- }
-
- link = &c.d;
- loop {
- d = *link;
- if (!d) {
- break;
- }
-
- dir = nlist_cmp(l, &d.key);
-
- if (dir < 0) {
- link = &d.l;
- } else if (dir > 0) {
- link = &d.r;
- } else {
- return d;
- }
- }
-
- d = alloc(&c.a, sizeof(*d)): *dfa;
- d.id = c.ndfa;
- d.link = alloc_link(c);
- nlist_copy(c, &d.key, l);
- d.l = 0: *dfa;
- d.r = 0: *dfa;
- d.seen = 0;
-
- c.ndfa = c.ndfa + 1;
-
- *link = d;
-
- i = 0;
- loop {
- if (i == 256) {
- break;
- }
-
- deactivate(l);
- j = 0;
- loop {
- if (j >= d.key.fill) {
- break;
- }
-
- n = d.key.live[j];
- if (n.left <= i && i < n.right) {
- if (n.a) {
- activate(l, n.a);
- }
- if (n.b) {
- activate(l, n.b);
- }
- }
-
- j = j + 1;
- }
-
- d.link[i] = nlist2dfa(c, l);
-
- i = i + 1;
- }
-
- return d;
-}
-
-deactivate(l: *nlist): void {
- var i: int;
- i = 0;
- loop {
- if (i >= l.fill) {
- l.fill = 0;
- l.tag = 0: *tag;
- break;
- }
- l.live[i].live = 0;
- i = i + 1;
- }
-}
-
-powerset(c: *compiler, n: *nfa): *dfa {
- var live: nlist;
- alloc_nlist(c, &live, c.nnfa);
- activate(&live, n);
- return nlist2dfa(c, &live);
-}
-
-codegen(c: *compiler, a: *dfa): void {
- var i: int;
- var b: *dfa;
- var lo: int;
- var hi: int;
-
- if (a.seen) {
- return;
- }
- a.seen = 1;
-
- fdput(1, ":_");
- fdputd(1, a.id);
- fdput(1, ";\n");
-
- if (a.key.tag) {
- fdput(1, "\tlexmark(l, T_");
- fdput(1, a.key.tag.s);
- fdput(1, ");\n");
- }
-
- fdput(1, "\tch = lexfeedc(l);\n");
-
- i = 0;
- loop {
- loop {
- if (i == 256 || a.link[i]) {
- lo = i;
- break;
- }
- i = i + 1;
- }
-
- if (lo == 256) {
- break;
- }
-
- b = a.link[i];
-
- loop {
- if (i == 256 || a.link[i] != b) {
- hi = i;
- break;
- }
- i = i + 1;
- }
-
- if (lo != (hi - 1)) {
- fdput(1, "\tif (ch >= ");
- fdputd(1, lo);
- fdput(1, " && ch <= ");
- fdputd(1, hi - 1);
- } else {
- fdput(1, "\tif (ch == ");
- fdputd(1, lo);
- }
- fdput(1, ") { ");
- fdput(1, "goto _");
- fdputd(1, b.id);
- fdput(1, "; }\n");
-
- }
-
- fdput(1, "\treturn;\n");
-
- i = 0;
- loop {
- if (i == 256) {
- break;
- }
-
- if (a.link[i]) {
- codegen(c, a.link[i]);
- }
-
- i = i + 1;
- }
-}
-
-gen(c: *compiler, a: *dfa): void {
- var t: *tag;
- t = c.tags;
- fdput(1, "enum {\n");
- fdput(1, "\tT_invalid,\n");
- fdput(1, "\tT_eof,\n");
- loop {
- if (!t) {
- break;
- }
- fdput(1, "\tT_");
- fdput(1, t.s);
- fdput(1, ",\n");
- t = t.next;
- }
- fdput(1, "}\n");
- fdput(1, "\n");
- fdput(1, "lexstep(l: *lex_state): void {\n");
- fdput(1, "\tvar ch: int;\n");
- fdput(1, "\tlexmark(l, T_invalid);\n");
- codegen(c, a);
- fdput(1, "}\n");
-}
-
-main(): void {
- var c: compiler;
- var n: *nfa;
- var a: *dfa;
- setup(&c);
- n = parse_program(&c);
- a = powerset(&c, n);
- gen(&c, a);
-}
diff --git a/lex1.c b/lex1.c
@@ -0,0 +1,408 @@
+enum {
+ T_EOF,
+ T_IDENT,
+ T_NUM,
+ T_HEX,
+ T_STR,
+ T_CHAR,
+ T_LPAR,
+ T_RPAR,
+ T_LBRA,
+ T_RBRA,
+ T_COMMA,
+ T_SEMI,
+ T_COLON,
+ T_STAR,
+ T_DOT,
+ T_NOT,
+ T_ASSIGN,
+ T_AMP,
+ T_OR,
+ T_XOR,
+ T_LT,
+ T_GT,
+ T_LE,
+ T_GE,
+ T_EQ,
+ T_NE,
+ T_ADD,
+ T_SUB,
+ T_LSH,
+ T_RSH,
+ T_BANG,
+ T_BOR,
+ T_BAND,
+ T_LSQ,
+ T_RSQ,
+ T_DIV,
+ T_MOD,
+}
+
+open_source(c: *compiler, filename: *byte) {
+ var fd: int;
+
+ c.filename = filename;
+ c.nc = 0;
+ c.lineno = 1;
+ c.colno = 1;
+ c.tlen = 0;
+ c.tt = 0;
+
+ fd = open(filename, 0, 0);
+ if (fd < 0) {
+ cdie(c, "failed to open file");
+ }
+
+ c.fdin = fd;
+ c.nc = getchar(c);
+
+ feed(c);
+}
+
+close_source(c: *compiler) {
+ if (c.fdin != 0) {
+ close(c.fdin);
+ }
+ c.fdin = 0;
+}
+
+getchar(c: *compiler): int {
+ var b: byte;
+ var ret: int;
+ ret = read(c.fdin, &b, 1);
+ if (ret < 0) {
+ exit(3);
+ }
+ if (ret == 0) {
+ return -1;
+ }
+ return b: int;
+}
+
+feedc(c: *compiler) {
+ c.nc = getchar(c);
+ if (c.nc == '\n') {
+ c.lineno = c.lineno + 1;
+ c.colno = 0;
+ }
+ c.colno = c.colno + 1;
+}
+
+tappend(c: *compiler) {
+ c.token[c.tlen] = c.nc:byte;
+ c.tlen = c.tlen + 1;
+ if (c.tlen == c.tmax) {
+ cdie(c, "identifier too long");
+ }
+ c.token[c.tlen] = 0:byte;
+ feedc(c);
+}
+
+feed(c: *compiler) {
+ c.tlen = 0;
+ c.token[0] = 0:byte;
+
+ loop {
+ if (c.nc == -1) {
+ // Reached the end of input
+ c.tt = T_EOF;
+ return;
+ } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r') {
+ // Whitespace
+ feedc(c);
+ } else if (c.nc == '\n') {
+ // Line end
+ feedc(c);
+ } else if (c.nc == '/') {
+ // Comment
+ feedc(c);
+ if (c.nc == '/') {
+ // Read until the end of the comment
+ loop {
+ if (c.nc == '\n' || c.nc == -1) {
+ break;
+ }
+ feedc(c);
+ }
+ } else {
+ c.tt = T_DIV;
+ return;
+ }
+
+ } else {
+ // Start of a real token
+ break;
+ }
+ }
+
+ // Identifier
+ if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') {
+ feed_ident(c);
+ return;
+ }
+
+ // String
+ if (c.nc == '"') {
+ feed_str(c);
+ return;
+ }
+
+ // Character
+ if (c.nc == '\'') {
+ feed_char(c);
+ return;
+ }
+
+ // Number
+ if (c.nc >= '0' && c.nc <= '9') {
+ feed_num(c);
+ return;
+ }
+
+ // Single character tokens
+ if (c.nc == '(') {
+ c.tt = T_LPAR;
+ feedc(c);
+ } else if (c.nc == ')') {
+ c.tt = T_RPAR;
+ feedc(c);
+ } else if (c.nc == '{') {
+ c.tt = T_LBRA;
+ feedc(c);
+ } else if (c.nc == '}') {
+ c.tt = T_RBRA;
+ feedc(c);
+ } else if (c.nc == ',') {
+ c.tt = T_COMMA;
+ feedc(c);
+ } else if (c.nc == ';') {
+ c.tt = T_SEMI;
+ feedc(c);
+ } else if (c.nc == ':') {
+ c.tt = T_COLON;
+ feedc(c);
+ } else if (c.nc == '*') {
+ c.tt = T_STAR;
+ feedc(c);
+ } else if (c.nc == '.') {
+ c.tt = T_DOT;
+ feedc(c);
+ } else if (c.nc == '=') {
+ c.tt = T_ASSIGN;
+ feedc(c);
+ if (c.nc == '=') {
+ c.tt = T_EQ;
+ feedc(c);
+ }
+ } else if (c.nc == '&') {
+ c.tt = T_AMP;
+ feedc(c);
+ if (c.nc == '&') {
+ c.tt = T_BAND;
+ feedc(c);
+ }
+ } else if (c.nc == '~') {
+ c.tt = T_NOT;
+ feedc(c);
+ } else if (c.nc == '|') {
+ c.tt = T_OR;
+ feedc(c);
+ if (c.nc == '|') {
+ c.tt = T_BOR;
+ feedc(c);
+ }
+ } else if (c.nc == '^') {
+ c.tt = T_XOR;
+ feedc(c);
+ } else if (c.nc == '!') {
+ c.tt = T_BANG;
+ feedc(c);
+ if (c.nc == '=') {
+ c.tt = T_NE;
+ feedc(c);
+ }
+ } else if (c.nc == '<') {
+ c.tt = T_LT;
+ feedc(c);
+ if (c.nc == '<') {
+ c.tt = T_LSH;
+ feedc(c);
+ } else if (c.nc == '=') {
+ c.tt = T_LE;
+ feedc(c);
+ }
+ } else if (c.nc == '>') {
+ c.tt = T_GT;
+ feedc(c);
+ if (c.nc == '>') {
+ c.tt = T_RSH;
+ feedc(c);
+ } else if (c.nc == '=') {
+ c.tt = T_GE;
+ feedc(c);
+ }
+ } else if (c.nc == '[') {
+ c.tt = T_LSQ;
+ feedc(c);
+ } else if (c.nc == ']') {
+ c.tt = T_RSQ;
+ feedc(c);
+ } else if (c.nc == '+') {
+ c.tt = T_ADD;
+ feedc(c);
+ } else if (c.nc == '-') {
+ c.tt = T_SUB;
+ feedc(c);
+ } else if (c.nc == '%') {
+ c.tt = T_MOD;
+ feedc(c);
+ } else {
+ cdie(c, "invalid char");
+ }
+}
+
+feed_ident(c: *compiler) {
+ c.tt = T_IDENT;
+ loop {
+ if (!((c.nc >= 'a' && c.nc <= 'z') ||
+ (c.nc >= 'A' && c.nc <= 'Z') ||
+ (c.nc >= '0' && c.nc <= '9') ||
+ c.nc == '_')) {
+ break;
+ }
+ tappend(c);
+ }
+}
+
+hexdig(c: *compiler): int {
+ if (c.nc >= '0' && c.nc <= '9') {
+ return c.nc - '0';
+ }
+
+ if (c.nc >= 'A' && c.nc <= 'F') {
+ return (c.nc - 'F') + 10;
+ }
+
+ if (c.nc >= 'a' && c.nc <= 'f') {
+ return (c.nc - 'a') + 10;
+ }
+
+ cdie(c, "invalid hex digit");
+ return 0;
+}
+
+feed_escape(c: *compiler) {
+ var hex: int;
+
+ // backslash
+ feedc(c);
+
+ if (c.nc == 't') {
+ c.nc = '\t';
+ } else if (c.nc == 'r') {
+ c.nc = '\r';
+ } else if (c.nc == 'n') {
+ c.nc = '\n';
+ } else if (c.nc == 'x') {
+ c.nc = getchar(c);
+ hex = hexdig(c) * 16;
+
+ c.nc = getchar(c);
+ hex = hex + hexdig(c);
+
+ c.nc = hex;
+ } else if (c.nc != '\\' && c.nc != '\'' && c.nc != '"') {
+ cdie(c, "invalid escape");
+ }
+}
+
+feed_str(c: *compiler) {
+ c.tt = T_STR;
+
+ // quote
+ feedc(c);
+
+ loop {
+ if (c.nc == '"') {
+ feedc(c);
+ return;
+ }
+
+ if (c.nc == -1 || c.nc == 0 || c.nc == '\n') {
+ cdie(c, "invalid char in string");
+ }
+
+ if (c.tlen == c.tmax) {
+ cdie(c, "string too long");
+ }
+
+ if (c.nc == '\\') {
+ feed_escape(c);
+ }
+
+ tappend(c);
+ }
+}
+
+feed_char(c: *compiler) {
+ c.tt = T_CHAR;
+
+ // quote
+ feedc(c);
+
+ if (c.nc == 0 || c.nc == -1 || c.nc == '\'' || c.nc == '\n') {
+ cdie(c, "invalid char");
+ }
+
+ if (c.nc == '\\') {
+ feed_escape(c);
+ }
+
+ tappend(c);
+
+ if (c.nc != '\'') {
+ cdie(c, "expected '");
+ }
+
+ feedc(c);
+}
+
+feed_hex(c: *compiler) {
+ c.tt = T_HEX;
+
+ loop {
+ if (!((c.nc >= '0' && c.nc <= '9')
+ || (c.nc >= 'a' && c.nc <= 'f')
+ || (c.nc >= 'A' && c.nc <= 'F'))) {
+ break;
+ }
+
+ tappend(c);
+ }
+
+ if (c.tlen == 0) {
+ cdie(c, "expected hex");
+ }
+}
+
+feed_num(c: *compiler) {
+ c.tt = T_NUM;
+
+ if (c.nc == '0') {
+ tappend(c);
+
+ if (c.nc == 'x') {
+ feedc(c);
+ feed_hex(c);
+ return;
+ }
+ }
+
+ loop {
+ if (!(c.nc >= '0' && c.nc <= '9')) {
+ break;
+ }
+
+ tappend(c);
+ }
+}
diff --git a/lib.c b/lib.c
@@ -0,0 +1,117 @@
+die(msg: *byte) {
+ fdputs(2, msg);
+ fdputs(2, "\n");
+ exit(1);
+}
+
+strlen(s: *byte): int {
+ var ret: int;
+ ret = 0;
+ loop {
+ if (s[ret] == 0:byte) {
+ return ret;
+ }
+ ret = ret + 1;
+ }
+}
+
+strcmp(a: *byte, b: *byte): int {
+ var i: int;
+
+ i = 0;
+
+ loop {
+ if (a[i] > b[i]) {
+ return 1;
+ }
+
+ if (a[i] < b[i]) {
+ return -1;
+ }
+
+ if (a[i] == 0:byte) {
+ return 0;
+ }
+
+ i = i + 1;
+ }
+}
+
+fdputc(fd: int, ch: int) {
+ var b: byte;
+ var ret: int;
+ b = ch: byte;
+ ret = write(fd, &b, 1);
+ if (ret != 1) {
+ exit(3);
+ }
+}
+
+fdputs(fd: int, msg: *byte) {
+ var len: int;
+ var ret: int;
+ var off: int;
+ len = strlen(msg);
+ off = 0;
+ loop {
+ if (off == len) {
+ break;
+ }
+ ret = write(fd, msg, len - off);
+ if (ret < 0) {
+ exit(3);
+ }
+ off = off + ret;
+ }
+}
+
+fdputh(fd: int, n: int) {
+ var c: int;
+ var r: int;
+
+ r = 0;
+ loop {
+ if (n == 0) {
+ break;
+ }
+
+ r = (r << 4) + (n & 15);
+ n = n >> 4;
+ }
+
+ n = r;
+
+ loop {
+ c = n & 15;
+ n = n >> 4;
+
+ if (c < 10) {
+ fdputc(fd, c + '0');
+ } else {
+ fdputc(fd, c + ('a' - 10));
+ }
+
+ if (n == 0) {
+ break;
+ }
+ }
+}
+
+fdputd(fd: int, n: int) {
+ var a: int;
+
+ if (n < 0) {
+ fdputc(fd, '-');
+ a = -(n % 10);
+ n = n / -10;
+ } else {
+ a = n % 10;
+ n = n / 10;
+ }
+
+ if (n != 0) {
+ fdputd(fd, n);
+ }
+
+ fdputc(fd, '0' + a);
+}
diff --git a/parse1.c b/parse1.c
@@ -0,0 +1,1482 @@
+struct node {
+ kind: int;
+ a: *node;
+ b: *node;
+ filename: *byte;
+ lineno: int;
+ colno: int;
+ n: int;
+ s: *byte;
+ t: *type;
+}
+
+enum {
+ N_IDENT,
+ N_NUM,
+ N_CHAR,
+ N_STR,
+ N_STMTLIST,
+ N_EXPRLIST,
+ N_CALL,
+ N_DOT,
+ N_ARGLIST,
+ N_FUNC,
+ N_ARGDECL,
+ N_FUNCDECL,
+ N_PROGRAM,
+ N_FUNCTYPE,
+ N_PTRTYPE,
+ N_STRUCT,
+ N_MEMBERDECL,
+ N_MEMBERLIST,
+ N_CONDLIST,
+ N_COND,
+ N_ENUM,
+ N_ENUMLIST,
+ N_LOOP,
+ N_BREAK,
+ N_CONTINUE,
+ N_RETURN,
+ N_VARDECL,
+ N_LABEL,
+ N_GOTO,
+ N_ASSIGN,
+ N_SIZEOF,
+ N_REF,
+ N_DEREF,
+ N_CAST,
+ N_INDEX,
+ N_LT,
+ N_GT,
+ N_LE,
+ N_GE,
+ N_EQ,
+ N_NE,
+ N_ADD,
+ N_SUB,
+ N_MUL,
+ N_LSH,
+ N_RSH,
+ N_BNOT,
+ N_BOR,
+ N_BAND,
+ N_AND,
+ N_OR,
+ N_XOR,
+ N_NOT,
+ N_POS,
+ N_NEG,
+ N_DIV,
+ N_MOD,
+}
+
+mknode(c: *compiler, kind: int, a: *node, b: *node): *node {
+ var ret: *node;
+ ret = alloc(c.a, sizeof(*ret)):*node;
+ ret.kind = kind;
+ ret.a = a;
+ ret.b = b;
+ ret.filename = c.filename;
+ ret.lineno = c.lineno;
+ ret.colno = c.colno;
+ ret.n = 0;
+ ret.s = 0:*byte;
+ ret.t = 0:*type;
+ return ret;
+}
+
+mknode0(c: *compiler, kind: int): *node {
+ return mknode(c, kind, 0:*node, 0:*node);
+}
+
+mknode1(c: *compiler, kind: int, a: *node): *node {
+ return mknode(c, kind, a, 0:*node);
+}
+
+// Copy the current token
+intern(c: *compiler): *byte {
+ var ret: *byte;
+ var i: int;
+
+ ret = alloc(c.a, c.tlen + 1);
+
+ i = 0;
+ loop {
+ if (i == c.tlen) {
+ ret[i] = 0:byte;
+ return ret;
+ }
+
+ ret[i] = c.token[i];
+
+ i = i + 1;
+ }
+}
+
+// ident := IDENT
+parse_ident(c: *compiler): *node {
+ var n: *node;
+
+ if (c.tt != T_IDENT) {
+ return 0:*node;
+ }
+
+ n = mknode0(c, N_IDENT);
+ n.s = intern(c);
+ feed(c);
+
+ return n;
+}
+
+parse_hex(c: *compiler): *node {
+ var n: *node;
+ var x: int;
+ var d: int;
+ var i: int;
+
+ x = 0;
+ i = 0;
+ loop {
+ if (i == c.tlen) {
+ break;
+ }
+
+ d = c.token[i]:int;
+
+ if (d >= '0' && d <= '9') {
+ d = d - '0';
+ } else if (d >= 'a' && d <= 'f') {
+ d = (d - 'a') + 10;
+ } else {
+ d = (d - 'A') + 10;
+ }
+
+ x = x * 16;
+ x = x + d;
+ i = i + 1;
+
+ if (x > 0x7fffffff) {
+ cdie(c, "overflow");
+ }
+ }
+
+ n = mknode0(c, N_NUM);
+ n.n = x;
+ feed(c);
+
+ return n;
+}
+
+// num := NUM
+parse_num(c: *compiler): *node {
+ var n: *node;
+ var x: int;
+ var d: int;
+ var i: int;
+
+ if (c.tt == T_HEX) {
+ return parse_hex(c);
+ }
+
+ if (c.tt != T_NUM) {
+ return 0:*node;
+ }
+
+ x = 0;
+ i = 0;
+ loop {
+ if (i == c.tlen) {
+ break;
+ }
+
+ d = (c.token[i]:int) - '0';
+
+ x = x * 10;
+
+ x = x + d;
+ i = i + 1;
+
+ if (x > 0x7fffffff) {
+ cdie(c, "overflow");
+ }
+ }
+
+ n = mknode0(c, N_NUM);
+ n.n = x;
+ feed(c);
+
+ return n;
+}
+
+// chr := CHAR
+parse_chr(c: *compiler): *node {
+ var n: *node;
+
+ if (c.tt != T_CHAR) {
+ return 0:*node;
+ }
+
+ n = mknode0(c, N_CHAR);
+ n.n = c.token[0]:int;
+ feed(c);
+
+ return n;
+}
+
+// str := STR
+parse_str(c: *compiler): *node {
+ var n: *node;
+
+ if (c.tt != T_STR) {
+ return 0:*node;
+ }
+
+ n = mknode0(c, N_STR);
+ n.s = intern(c);
+ feed(c);
+
+ return n;
+}
+
+// primary := ident
+// | num
+// | str
+// | chr
+// | '(' expr ')'
+parse_primary(c: *compiler): *node {
+ var n: *node;
+
+ n = parse_ident(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_num(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_str(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_chr(c);
+ if (n) {
+ return n;
+ }
+
+ if (c.tt == T_LPAR) {
+ feed(c);
+
+ n = parse_expr(c);
+ if (!n) {
+ cdie(c, "expected expr");
+ }
+
+ if (c.tt != T_RPAR) {
+ cdie(c, "expected )");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ return 0:*node;
+}
+
+// expr_list := expr
+// | expr ',' expr_list
+parse_expr_list(c: *compiler): *node {
+ var n: *node;
+ var e: *node;
+ var a: *node;
+
+ a = parse_expr(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ e = mknode1(c, N_EXPRLIST, a);
+ n = e;
+
+ loop {
+ if (c.tt != T_COMMA) {
+ return n;
+ }
+ feed(c);
+
+ a = parse_expr(c);
+ if (!a) {
+ cdie(c, "expected expr");
+ }
+
+ e.b = mknode1(c, N_EXPRLIST, a);
+ e = e.b;
+ }
+}
+
+// post_expr := primary
+// | 'sizeof' '(' expr ')'
+// | post_expr '[' expr ']'
+// | post_expr '(' expr_list ')'
+// | post_expr '.' ident
+// | post_expr ':' type
+parse_post_expr(c: *compiler): *node {
+ var n: *node;
+ var b: *node;
+
+ if (c.tt == T_IDENT && !strcmp(c.token, "sizeof")) {
+ feed(c);
+
+ if (c.tt != T_LPAR) {
+ cdie(c, "expected (");
+ }
+ feed(c);
+
+ n = parse_expr(c);
+ if (!n) {
+ cdie(c, "expected expr");
+ }
+
+ if (c.tt != T_RPAR) {
+ cdie(c, "expected )");
+ }
+ feed(c);
+
+ return mknode1(c, N_SIZEOF, n);
+ }
+
+ n = parse_primary(c);
+ if (!n) {
+ return 0:*node;
+ }
+
+ loop {
+ if (c.tt == T_LSQ) {
+ feed(c);
+
+ b = parse_expr(c);
+
+ if (c.tt != T_RSQ) {
+ cdie(c, "expected ]");
+ }
+ feed(c);
+
+ n = mknode(c, N_INDEX, n, b);
+ } else if (c.tt == T_LPAR) {
+ feed(c);
+
+ b = parse_expr_list(c);
+
+ if (c.tt != T_RPAR) {
+ cdie(c, "expected )");
+ }
+ feed(c);
+
+ n = mknode(c, N_CALL, n, b);
+ } else if (c.tt == T_DOT) {
+ feed(c);
+
+ b = parse_ident(c);
+ if (!b) {
+ cdie(c, "expected ident");
+ }
+
+ n = mknode(c, N_DOT, n, b);
+ } else if (c.tt == T_COLON) {
+ feed(c);
+
+ b = parse_type(c);
+ if (!b) {
+ cdie(c, "expected type");
+ }
+
+ n = mknode(c, N_CAST, n, b);
+ } else {
+ return n;
+ }
+ }
+}
+
+// unary_expr := post_expr
+// | '&' unary_expr
+// | '*' unary_expr
+// | '+' unary_expr
+// | '-' unary_expr
+// | '~' unary_expr
+// | '!' unary_expr
+parse_unary_expr(c: *compiler): *node {
+ var n: *node;
+
+ if (c.tt == T_AMP) {
+ feed(c);
+
+ n = parse_unary_expr(c);
+ if (!n) {
+ cdie(c, "expected unary_expr");
+ }
+
+ return mknode1(c, N_REF, n);
+ }
+
+ if (c.tt == T_STAR) {
+ feed(c);
+
+ n = parse_unary_expr(c);
+ if (!n) {
+ cdie(c, "expected unary_expr");
+ }
+
+ return mknode1(c, N_DEREF, n);
+ }
+
+ if (c.tt == T_ADD) {
+ feed(c);
+
+ n = parse_unary_expr(c);
+ if (!n) {
+ cdie(c, "expected unary_expr");
+ }
+
+ return mknode1(c, N_POS, n);
+ }
+
+ if (c.tt == T_SUB) {
+ feed(c);
+
+ n = parse_unary_expr(c);
+ if (!n) {
+ cdie(c, "expected unary_expr");
+ }
+
+ return mknode1(c, N_NEG, n);
+ }
+
+ if (c.tt == T_NOT) {
+ feed(c);
+
+ n = parse_unary_expr(c);
+ if (!n) {
+ cdie(c, "expected unary_expr");
+ }
+
+ return mknode1(c, N_NOT, n);
+ }
+
+ if (c.tt == T_BANG) {
+ feed(c);
+
+ n = parse_unary_expr(c);
+ if (!n) {
+ cdie(c, "expected unary_expr");
+ }
+
+ return mknode1(c, N_BNOT, n);
+ }
+
+ return parse_post_expr(c);
+}
+
+
+// shift_expr := unary_expr
+// | shift_expr '<<' unary_expr
+// | shift_expr '>>' unary_expr
+parse_shift_expr(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ a = parse_unary_expr(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ loop {
+ if (c.tt == T_LSH) {
+ feed(c);
+
+ b = parse_unary_expr(c);
+ if (!b) {
+ cdie(c, "expected unary_expr");
+ }
+
+ a = mknode(c, N_LSH, a, b);
+ } else if (c.tt == T_RSH) {
+ feed(c);
+
+ b = parse_unary_expr(c);
+ if (!b) {
+ cdie(c, "expected unary_expr");
+ }
+
+ a = mknode(c, N_RSH, a, b);
+ } else {
+ return a;
+ }
+ }
+}
+
+// mul_expr := shift_expr
+// | mul_expr '*' shift_expr
+// | mul_expr '/' shift_expr
+// | mul_expr '%' shift_expr
+// | mul_expr '&' shift_expr
+parse_mul_expr(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ a = parse_shift_expr(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ loop {
+ if (c.tt == T_STAR) {
+ feed(c);
+
+ b = parse_shift_expr(c);
+ if (!b) {
+ cdie(c, "expected shift_expr");
+ }
+
+ a = mknode(c, N_MUL, a, b);
+ } else if (c.tt == T_DIV) {
+ feed(c);
+
+ b = parse_shift_expr(c);
+ if (!b) {
+ cdie(c, "expected shift_expr");
+ }
+
+ a = mknode(c, N_DIV, a, b);
+ } else if (c.tt == T_MOD) {
+ feed(c);
+
+ b = parse_shift_expr(c);
+ if (!b) {
+ cdie(c, "expected shift_expr");
+ }
+
+ a = mknode(c, N_MOD, a, b);
+ } else if (c.tt == T_AMP) {
+ feed(c);
+
+ b = parse_shift_expr(c);
+ if (!b) {
+ cdie(c, "expected shift_expr");
+ }
+
+ a = mknode(c, N_AND, a, b);
+ } else {
+ return a;
+ }
+ }
+}
+
+// add_expr := mul_expr
+// | add_expr '+' mul_expr
+// | add_expr '-' mul_expr
+// | add_expr '|' mul_expr
+// | add_expr '^' mul_expr
+parse_add_expr(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ a = parse_mul_expr(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ loop {
+ if (c.tt == T_ADD) {
+ feed(c);
+
+ b = parse_mul_expr(c);
+ if (!b) {
+ cdie(c, "expected mul_expr");
+ }
+
+ a = mknode(c, N_ADD, a, b);
+ } else if (c.tt == T_SUB) {
+ feed(c);
+
+ b = parse_mul_expr(c);
+ if (!b) {
+ cdie(c, "expected mul_expr");
+ }
+
+ a = mknode(c, N_SUB, a, b);
+ } else if (c.tt == T_OR) {
+ feed(c);
+
+ b = parse_mul_expr(c);
+ if (!b) {
+ cdie(c, "expected mul_expr");
+ }
+
+ a = mknode(c, N_OR, a, b);
+ } else if (c.tt == T_XOR) {
+ feed(c);
+
+ b = parse_mul_expr(c);
+ if (!b) {
+ cdie(c, "expected mul_expr");
+ }
+
+ a = mknode(c, N_XOR, a, b);
+ } else {
+ return a;
+ }
+ }
+}
+
+// comp_expr := add_expr
+// | add_expr '<' add_expr
+// | add_expr '>' add_expr
+// | add_expr '<=' add_expr
+// | add_expr '>=' add_expr
+// | add_expr '==' add_expr
+// | add_expr '!=' add_expr
+parse_comp_expr(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ a = parse_add_expr(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ if (c.tt == T_LT) {
+ feed(c);
+
+ b = parse_add_expr(c);
+ if (!b) {
+ cdie(c, "expected add_expr");
+ }
+
+ return mknode(c, N_LT, a, b);
+ }
+
+ if (c.tt == T_GT) {
+ feed(c);
+
+ b = parse_add_expr(c);
+ if (!b) {
+ cdie(c, "expected add_expr");
+ }
+
+ return mknode(c, N_GT, a, b);
+ }
+
+ if (c.tt == T_LE) {
+ feed(c);
+
+ b = parse_add_expr(c);
+ if (!b) {
+ cdie(c, "expected add_expr");
+ }
+
+ return mknode(c, N_LE, a, b);
+ }
+
+ if (c.tt == T_GE) {
+ feed(c);
+
+ b = parse_add_expr(c);
+ if (!b) {
+ cdie(c, "expected add_expr");
+ }
+
+ return mknode(c, N_GE, a, b);
+ }
+
+ if (c.tt == T_EQ) {
+ feed(c);
+
+ b = parse_add_expr(c);
+ if (!b) {
+ cdie(c, "expected add_expr");
+ }
+
+ return mknode(c, N_EQ, a, b);
+ }
+
+ if (c.tt == T_NE) {
+ feed(c);
+
+ b = parse_add_expr(c);
+ if (!b) {
+ cdie(c, "expected add_expr");
+ }
+
+ return mknode(c, N_NE, a, b);
+ }
+
+ return a;
+}
+
+// bool_expr := bool_expr
+// | bool_expr '&&' comp_expr
+// | bool_expr '||' comp_expr
+parse_bool_expr(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ a = parse_comp_expr(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ if (c.tt == T_BAND) {
+ feed(c);
+
+ b = parse_bool_expr(c);
+ if (!b) {
+ cdie(c, "expected bool_expr");
+ }
+
+ return mknode(c, N_BAND, a, b);
+ }
+
+ if (c.tt == T_BOR) {
+ feed(c);
+
+ b = parse_bool_expr(c);
+ if (!b) {
+ cdie(c, "expected bool_expr");
+ }
+
+ return mknode(c, N_BOR, a, b);
+ }
+
+ return a;
+}
+
+// expr := bool_expr
+// | bool_expr '=' expr
+parse_expr(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ a = parse_bool_expr(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ if (c.tt == T_ASSIGN) {
+ feed(c);
+
+ b = parse_expr(c);
+ if (!b) {
+ cdie(c, "expected expr");
+ }
+
+ return mknode(c, N_ASSIGN, a, b);
+ }
+
+ return a;
+}
+
+// if_stmt := 'if' expr '{' stmt_list '}'
+// | 'if' expr '{' stmt_list '}' 'else' if_stmt
+// | 'if' expr '{' stmt_list '}' 'else' '{' stmt_list '}'
+parse_if_stmt(c: *compiler): *node {
+ var n: *node;
+ var e: *node;
+ var a: *node;
+ var b: *node;
+
+ if (c.tt != T_IDENT || strcmp(c.token, "if")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ n = mknode0(c, N_CONDLIST);
+ e = n;
+
+ loop {
+ a = parse_expr(c);
+ if !a {
+ cdie(c, "expected expr");
+ }
+
+ if (c.tt != T_LBRA) {
+ cdie(c, "expected {");
+ }
+ feed(c);
+
+ b = parse_stmt_list(c);
+
+ if (c.tt != T_RBRA) {
+ cdie(c, "expected }");
+ }
+ feed(c);
+
+ e.a = mknode(c, N_COND, a, b);
+
+ if (c.tt != T_IDENT || strcmp(c.token, "else")) {
+ return n;
+ }
+ feed(c);
+
+ if (c.tt == T_LBRA) {
+ feed(c);
+
+ b = parse_stmt_list(c);
+
+ if (c.tt != T_RBRA) {
+ cdie(c, "expected }");
+ }
+ feed(c);
+
+ e.b = mknode1(c, N_CONDLIST, mknode(c, N_COND, 0:*node, b));
+
+ return n;
+ }
+
+ if (c.tt != T_IDENT || strcmp(c.token, "if")) {
+ cdie(c, "expected if");
+ }
+ feed(c);
+
+ e.b = mknode0(c, N_CONDLIST);
+ e = e.b;
+ }
+}
+
+// loop_stmt := 'loop' '{' stmt_list '}'
+parse_loop_stmt(c: *compiler): *node {
+ var a: *node;
+
+ if (c.tt != T_IDENT || strcmp(c.token, "loop")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ if (c.tt != T_LBRA) {
+ cdie(c, "expected {");
+ }
+ feed(c);
+
+ a = parse_stmt_list(c);
+
+ if (c.tt != T_RBRA) {
+ cdie(c, "expected }");
+ }
+ feed(c);
+
+ return mknode1(c, N_LOOP, a);
+}
+
+// break_stmt := 'break'
+parse_break_stmt(c: *compiler): *node {
+ if (c.tt != T_IDENT || strcmp(c.token, "break")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ return mknode0(c, N_BREAK);
+}
+
+// continue_stmt := 'continue'
+parse_continue_stmt(c: *compiler): *node {
+ if (c.tt != T_IDENT || strcmp(c.token, "continue")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ return mknode0(c, N_CONTINUE);
+}
+
+// return_stmt := 'return'
+// | 'return' expr
+parse_return_stmt(c: *compiler): *node {
+ var a: *node;
+
+ if (c.tt != T_IDENT || strcmp(c.token, "return")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ a = parse_expr(c);
+
+ return mknode1(c, N_RETURN, a);
+}
+
+// var_decl := ident ':' type
+parse_var_decl(c: *compiler): *node {
+ var n: *node;
+ var a: *node;
+ var b: *node;
+
+ a = parse_ident(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ if (c.tt != T_COLON) {
+ cdie(c, "expected :");
+ }
+ feed(c);
+
+ b = parse_type(c);
+ if (!b) {
+ cdie(c, "expected type");
+ }
+
+ return mknode(c, N_VARDECL, a, b);
+}
+
+// var_stmt := 'var' var_decl
+parse_var_stmt(c: *compiler): *node {
+ var a: *node;
+
+ if (c.tt != T_IDENT || strcmp(c.token, "var")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ a = parse_var_decl(c);
+ if (!a) {
+ cdie(c, "expected var_decl");
+ }
+
+ return a;
+}
+
+// label_stmt := ':' ident
+parse_label_stmt(c: *compiler): *node {
+ var a: *node;
+
+ if (c.tt != T_COLON) {
+ return 0:*node;
+ }
+ feed(c);
+
+ a = parse_ident(c);
+ if (!a) {
+ cdie(c, "expected ident");
+ }
+
+ return mknode1(c, N_LABEL, a);
+}
+
+// goto_stmt := 'goto' ident
+parse_goto_stmt(c: *compiler): *node {
+ var a: *node;
+
+ if (c.tt != T_IDENT || strcmp(c.token, "goto")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ a = parse_ident(c);
+ if (!a) {
+ cdie(c, "expected ident");
+ }
+
+ return mknode1(c, N_GOTO, a);
+}
+
+// stmt := if_stmt
+// | loop_stmt
+// | break_stmt ';'
+// | continue_stmt ';'
+// | return_stmt ';'
+// | var_stmt ';'
+// | label_stmt ';'
+// | goto_stmt ';'
+// | expr ';'
+parse_stmt(c: *compiler): *node {
+ var n: *node;
+
+ n = parse_if_stmt(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_loop_stmt(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_break_stmt(c);
+ if (n) {
+ if (c.tt != T_SEMI) {
+ cdie(c, "expected ;");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ n = parse_return_stmt(c);
+ if (n) {
+ if (c.tt != T_SEMI) {
+ cdie(c, "expected ;");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ n = parse_continue_stmt(c);
+ if (n) {
+ if (c.tt != T_SEMI) {
+ cdie(c, "expected ;");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ n = parse_var_stmt(c);
+ if (n) {
+ if (c.tt != T_SEMI) {
+ cdie(c, "expected ;");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ n = parse_label_stmt(c);
+ if (n) {
+ if (c.tt != T_SEMI) {
+ cdie(c, "expected ;");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ n = parse_goto_stmt(c);
+ if (n) {
+ if (c.tt != T_SEMI) {
+ cdie(c, "expected ;");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ n = parse_expr(c);
+ if (n) {
+ if (c.tt != T_SEMI) {
+ cdie(c, "expected ;");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ return 0:*node;
+}
+
+// stmt_list := stmt
+// | stmt stmt_list
+parse_stmt_list(c: *compiler): *node {
+ var n: *node;
+ var e: *node;
+ var a: *node;
+
+ a = parse_stmt(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ e = mknode1(c, N_STMTLIST, a);
+ n = e;
+
+ loop {
+ a = parse_stmt(c);
+ if (!a) {
+ return n;
+ }
+
+ e.b = mknode1(c, N_STMTLIST, a);
+ e = e.b;
+ }
+}
+
+// enum_list := ident
+// | enum_list ',' enum_list
+parse_enum_list(c: *compiler): *node {
+ var n: *node;
+ var e: *node;
+ var a: *node;
+
+ a = parse_ident(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ e = mknode1(c, N_ENUMLIST, a);
+ n = e;
+
+ loop {
+ if (c.tt != T_COMMA) {
+ return n;
+ }
+ feed(c);
+
+ a = parse_ident(c);
+ if (!a) {
+ return n;
+ }
+
+ e.b = mknode1(c, N_ENUMLIST, a);
+ e = e.b;
+ }
+}
+
+// enum_decl := 'enum' ident '{' enum_list '}'
+parse_enum_decl(c: *compiler): *node {
+ var b: *node;
+
+ if (c.tt != T_IDENT) {
+ return 0:*node;
+ }
+
+ if (strcmp(c.token, "enum")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ if (c.tt != T_LBRA) {
+ cdie(c, "expected {");
+ }
+ feed(c);
+
+ b = parse_enum_list(c);
+
+ if (c.tt != T_RBRA) {
+ cdie(c, "expected }");
+ }
+ feed(c);
+
+ return mknode(c, N_ENUM, 0:*node, b);
+}
+
+// type := ident
+// | '*' type
+// | '(' type ')'
+// | 'func' func_type
+parse_type(c: *compiler): *node {
+ var n: *node;
+
+ if (c.tt == T_STAR) {
+ feed(c);
+
+ n = parse_type(c);
+
+ return mknode1(c, N_PTRTYPE, n);
+ }
+
+ if (c.tt == T_LPAR) {
+ feed(c);
+
+ n = parse_type(c);
+
+ if (c.tt != T_RPAR) {
+ cdie(c, "expected )");
+ }
+ feed(c);
+
+ return n;
+ }
+
+ if (c.tt == T_IDENT && !strcmp(c.token, "func")) {
+ feed(c);
+
+ n = parse_func_type(c);
+ if (!n) {
+ cdie(c, "expected func_type");
+ }
+
+ return n;
+ }
+
+ return parse_ident(c);
+}
+
+// member_decl := ident ':' type
+parse_member_decl(c: *compiler): *node {
+ var n: *node;
+ var a: *node;
+ var b: *node;
+
+ a = parse_ident(c);
+ if (!a) {
+ return 0: *node;
+ }
+
+ if (c.tt != T_COLON) {
+ cdie(c, "expected :");
+ }
+ feed(c);
+
+ b = parse_type(c);
+ if (!b) {
+ cdie(c, "expected type");
+ }
+
+ return mknode(c, N_MEMBERDECL, a, b);
+}
+
+// member_list := member_decl
+// | member_decl ';' member_list
+parse_member_list(c: *compiler): *node {
+ var n: *node;
+ var e: *node;
+ var a: *node;
+
+ a = parse_member_decl(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ e = mknode1(c, N_MEMBERLIST, a);
+ n = e;
+
+ loop {
+ if (c.tt != T_SEMI) {
+ return n;
+ }
+ feed(c);
+
+ a = parse_member_decl(c);
+ if (!a) {
+ return n;
+ }
+
+ e.b = mknode1(c, N_MEMBERLIST, a);
+ e = e.b;
+ }
+}
+
+// struct_decl := 'struct' ident '{' member_list '}'
+parse_struct_decl(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ if (c.tt != T_IDENT) {
+ return 0:*node;
+ }
+
+ if (strcmp(c.token, "struct")) {
+ return 0:*node;
+ }
+ feed(c);
+
+ a = parse_ident(c);
+ if (!a) {
+ cdie(c, "expected ident");
+ }
+
+ if (c.tt != T_LBRA) {
+ cdie(c, "expected {");
+ }
+ feed(c);
+
+ b = parse_member_list(c);
+
+ if (c.tt != T_RBRA) {
+ cdie(c, "expected }");
+ }
+ feed(c);
+
+ return mknode(c, N_STRUCT, a, b);
+}
+
+// arg_decl := ':' type
+// ident ':' type
+parse_arg_decl(c: *compiler): *node {
+ var n: *node;
+ var a: *node;
+ var b: *node;
+
+ a = parse_ident(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ if (c.tt != T_COLON) {
+ cdie(c, "expected :");
+ }
+ feed(c);
+
+ b = parse_type(c);
+ if (!b) {
+ cdie(c, "expected type");
+ }
+
+ return mknode(c, N_ARGDECL, a, b);
+}
+
+// arg_list := arg_decl
+// | arg_decl ',' arg_list
+parse_arg_list(c: *compiler): *node {
+ var n: *node;
+ var e: *node;
+ var a: *node;
+
+ a = parse_arg_decl(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ e = mknode1(c, N_ARGLIST, a);
+ n = e;
+
+ loop {
+ if (c.tt != T_COMMA) {
+ return n;
+ }
+ feed(c);
+
+ a = parse_arg_decl(c);
+ if (!a) {
+ cdie(c, "expected identifier");
+ }
+
+ e.b = mknode1(c, N_ARGLIST, a);
+ e = e.b;
+ }
+}
+
+// func_type := '(' ')' ':' type
+// | '(' arg_list ')' ':' type
+// | '(' ')'
+// | '(' arg_list ')'
+parse_func_type(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ if (c.tt != T_LPAR) {
+ return 0: *node;
+ }
+ feed(c);
+
+ a = parse_arg_list(c);
+
+ if (c.tt != T_RPAR) {
+ cdie(c, "expected )");
+ }
+ feed(c);
+
+ if (c.tt != T_COLON) {
+ return mknode1(c, N_FUNCTYPE, a);
+ }
+ feed(c);
+
+ b = parse_type(c);
+ if (!b) {
+ cdie(c, "expected type");
+ }
+
+ return mknode(c, N_FUNCTYPE, a, b);
+}
+
+// func_decl := ident func_type
+parse_func_decl(c: *compiler): *node {
+ var a: *node;
+ var b: *node;
+
+ a = parse_ident(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ b = parse_func_type(c);
+ if (!b) {
+ cdie(c, "expected func_type");
+ }
+
+ return mknode(c, N_FUNCDECL, a, b);
+}
+
+// func := func_decl '{' stmt_list '}'
+// | func_decl ';'
+parse_func(c: *compiler): *node {
+ var n: *node;
+ var a: *node;
+ var b: *node;
+
+ a = parse_func_decl(c);
+ if (!a) {
+ return 0:*node;
+ }
+
+ if (c.tt == T_SEMI) {
+ feed(c);
+ return a;
+ }
+
+ if (c.tt != T_LBRA) {
+ cdie(c, "expected {");
+ }
+ feed(c);
+
+ b = parse_stmt_list(c);
+
+ if (c.tt != T_RBRA) {
+ cdie(c, "expected }");
+ }
+ feed(c);
+
+ return mknode(c, N_FUNC, a, b);
+}
+
+// decl := enum_decl
+// | struct_decl
+// | func
+parse_decl(c: *compiler): *node {
+ var n: *node;
+
+ n = parse_enum_decl(c);
+ if (n) {
+ return n;
+ }
+
+ n = parse_struct_decl(c);
+ if (n) {
+ return n;
+ }
+
+ return parse_func(c);
+}
+
+// program := decl
+// | decl program
+parse_program(c: *compiler, p: *node): *node {
+ var n: *node;
+ var e: *node;
+ var d: *node;
+
+ d = parse_decl(c);
+ if (!d) {
+ if (c.tt != T_EOF) {
+ cdie(c, "expected eof");
+ }
+ return p;
+ }
+
+ e = mknode1(c, N_PROGRAM, d);
+ n = e;
+
+ loop {
+ d = parse_decl(c);
+ if (!d) {
+ if (c.tt != T_EOF) {
+ cdie(c, "expected eof");
+ }
+
+ e.b = p;
+ return n;
+ }
+
+ e.b = mknode1(c, N_PROGRAM, d);
+ e = e.b;
+ }
+}
diff --git a/syscall.c b/syscall.c
@@ -0,0 +1,34 @@
+syscall(n: int, a1: int, a2: int, a3: int, a4: int, a5: int, a6: int): int;
+
+_start(argc: int, argv: **byte, envp: **byte) {
+ main(argc, argv, envp);
+ exit(0);
+}
+
+read(fd: int, buf: *byte, n: int): int {
+ return syscall(0, fd, buf: int, n, 0, 0, 0);
+}
+
+write(fd: int, buf: *byte, n: int): int {
+ return syscall(1, fd, buf: int, n, 0, 0, 0);
+}
+
+open(name: *byte, flags: int, mode: int): int {
+ return syscall(2, name: int, flags, mode, 0, 0, 0);
+}
+
+close(fd: int): int {
+ return syscall(3, fd, 0, 0, 0, 0, 0);
+}
+
+mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int {
+ return syscall(9, addr, len, prot, flags, fd, off);
+}
+
+exit(n: int) {
+ syscall(60, n, 0, 0, 0, 0, 0);
+}
+
+unlink(name: *byte): int {
+ return syscall(87, name: int, 0, 0, 0, 0, 0);
+}
diff --git a/type.c b/type.c
@@ -0,0 +1,186 @@
+struct type {
+ kind: int;
+ st: *decl;
+ val: *type;
+ arg: *type;
+}
+
+enum {
+ TY_VOID,
+ TY_INT,
+ TY_BYTE,
+ TY_PTR,
+ TY_ARG,
+ TY_FUNC,
+ TY_STRUCT,
+}
+
+type_sizeof(c: *compiler, t: *type): int {
+ var kind: int;
+
+ kind = t.kind;
+ if (kind == TY_INT) {
+ return 8;
+ } else if (kind == TY_BYTE) {
+ return 8;
+ } else if (kind == TY_PTR) {
+ return 8;
+ } else if (kind == TY_FUNC) {
+ return 8;
+ } else if (kind == TY_STRUCT) {
+ layout_struct(c, t.st);
+ return t.st.struct_size;
+ } else {
+ cdie(c, "sizeof: invalid type");
+ }
+}
+
+// Unify two types
+unify(c: *compiler, a: *type, b: *type) {
+ var kind: int;
+
+ if (a == b) {
+ return;
+ }
+
+ if ((a && !b) || (b && !a) || a.kind != b.kind) {
+ cdie(c, "type error");
+ }
+
+ kind = a.kind;
+ if (kind == TY_PTR) {
+ unify(c, a.val, b.val);
+ } else if (kind == TY_FUNC) {
+ unify(c, a.val, b.val);
+ unify(c, a.arg, b.arg);
+ } else if (kind == TY_ARG) {
+ unify(c, a.val, b.val);
+ unify(c, a.arg, b.arg);
+ } else if (kind == TY_STRUCT) {
+ if (a.st != b.st) {
+ cdie(c, "type error");
+ }
+ } else if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE) {
+ cdie(c, "unify: invalid type");
+ }
+}
+
+count_args(c: *compiler, t: *type): int {
+ var nargs: int;
+
+ nargs = 0;
+ loop {
+ if (!t) {
+ break;
+ }
+
+ t = t.arg;
+ nargs = nargs + 1;
+ }
+
+ return nargs;
+}
+
+mktype(c: *compiler, kind: int, a: *type, b: *type, st: *decl): *type {
+ var t: *type;
+
+ t = alloc(c.a, sizeof(*t)):*type;
+
+ t.kind = kind;
+ t.st = st;
+ t.val = a;
+ t.arg = b;
+
+ return t;
+}
+
+mktype_struct(c: *compiler, st: *decl): *type {
+ return mktype(c, TY_STRUCT, 0:*type, 0:*type, st);
+}
+
+mktype0(c: *compiler, kind: int): *type {
+ return mktype(c, kind, 0:*type, 0:*type, 0:*decl);
+}
+
+mktype1(c: *compiler, kind: int, a: *type): *type {
+ return mktype(c, kind, a, 0:*type, 0:*decl);
+}
+
+mktype2(c: *compiler, kind: int, a: *type, b: *type): *type {
+ return mktype(c, kind, a, b, 0:*decl);
+}
+
+prototype(c: *compiler, n: *node): *type {
+ var a: *type;
+ var b: *type;
+ var st: *decl;
+ var kind: int;
+
+ if (!n) {
+ return 0:*type;
+ }
+
+ c.lineno = n.lineno;
+ c.colno = 0;
+
+ kind = n.kind;
+ if (kind == N_IDENT) {
+ if (!strcmp(n.s, "void")) {
+ return mktype0(c, TY_VOID);
+ }
+
+ if (!strcmp(n.s, "int")) {
+ return mktype0(c, TY_INT);
+ }
+
+ if (!strcmp(n.s, "byte")) {
+ return mktype0(c, TY_BYTE);
+ }
+
+ st = find(c, n.s, 0:*byte, 0);
+ if (!st || !st.struct_defined) {
+ cdie(c, "unknown struct");
+ }
+
+ return mktype_struct(c, st);
+ } else if (kind == N_ARGLIST) {
+ a = prototype(c, n.a.b);
+ b = prototype(c, n.b);
+
+ kind = a.kind;
+ if (kind != TY_INT && kind != TY_BYTE
+ && kind != TY_PTR && kind != TY_FUNC) {
+ cdie(c, "not a ptr arg");
+ }
+
+ return mktype2(c, TY_ARG, a, b);
+ } else if (kind == N_FUNCTYPE) {
+ if (n.b) {
+ a = prototype(c, n.b);
+ } else{
+ a = mktype0(c, TY_VOID);
+ }
+
+ b = prototype(c, n.a);
+
+ kind = a.kind;
+ if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE
+ && kind != TY_PTR && kind != TY_FUNC) {
+ cdie(c, "not a ptr return");
+ }
+
+ return mktype2(c, TY_FUNC, a, b);
+ } else if (kind == N_PTRTYPE) {
+ return mktype1(c, TY_PTR, prototype(c, n.a));
+ } else {
+ cdie(c, "prototype: invalid type");
+ }
+}
+
+type_isint(t: *type): int {
+ return t.kind == TY_INT || t.kind == TY_BYTE;
+}
+
+type_isprim(t: *type): int {
+ return t.kind != TY_VOID && t.kind != TY_STRUCT;
+}