os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit c57ba5281c2a026ea671a56411ef8c0956f5836d
parent 1a41782c6970d2f794df42b7805dccd7ed8a6bbd
Author: erai <erai@omiltem.net>
Date:   Sat, 16 Mar 2024 11:39:46 -0400

Refactor into separate parse, lex, compile stages

Diffstat:
M.gitignore | 3++-
Aalloc.c | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aas.c | 960+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dbuildlex.sh | 10----------
Mcc0.c | 28+++++++++++++++++++---------
Mcc1.c | 3774++++++-------------------------------------------------------------------------
Acc3.y | 152+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agenlex.c | 1115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dlex.c | 1289-------------------------------------------------------------------------------
Alex1.c | 408+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparse1.c | 1482+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asyscall.c | 34++++++++++++++++++++++++++++++++++
Atype.c | 186+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
14 files changed, 4805 insertions(+), 4815 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,4 +1,5 @@ cc0 cc1 cc2 -lex +genlex +gencc diff --git a/alloc.c b/alloc.c @@ -0,0 +1,62 @@ +struct page { + ptr: *byte; + fill: int; + size: int; +} + +struct alloc { + page: *page; +} + +setup_alloc(c: *alloc) { + c.page = 0: *page; +} + +alloc(c: *alloc, size: int): *byte { + var page: *page; + var mret: int; + var ret: *byte; + var psize: int; + + if (size < 0) { + die("invalid alloc"); + } + + if (size >= 2048) { + size = size + 4095; + size = size & ~4095; + mret = mmap(0, size, 3, 0x22, -1, 0); + if (mret == -1) { + die("out of memory"); + } + ret = mret: *byte; + return ret; + } + + page = c.page; + if (page) { + if (size <= page.size - page.fill) { + mret = page.ptr:int + page.fill; + page.fill = page.fill + size; + ret = mret: *byte; + return ret; + } + } + + psize = 64 * 1024; + + mret = mmap(0, psize, 3, 0x22, -1, 0); + if (mret == -1) { + die("out of memory"); + } + + page = mret: *page; + page.ptr = (&page[1]): *byte; + ret = page.ptr; + page.size = psize - sizeof(*page); + page.fill = size; + + c.page = page; + + return ret; +} diff --git a/as.c b/as.c @@ -0,0 +1,960 @@ +struct fixup { + next: *fixup; + ptr: *byte; + at: int; +} + +struct label { + fix: *fixup; + at: int; + fixed: int; +} + +struct chunk { + next: *chunk; + buf: *byte; + fill: int; + cap: int; +} + +struct assembler { + a: *alloc; + fdout: int; + at: int; + text: *chunk; + text_end: *chunk; +} + +setup_assembler(a: *alloc): *assembler { + var c: *assembler; + c = alloc(a, sizeof(*c)): *assembler; + c.a = a; + c.fdout = 1; + c.at = 0; + c.text = 0:*chunk; + c.text_end = 0:*chunk; + return c; +} + +putchar(c: *assembler, ch: int) { + fdputc(c.fdout, ch); +} + +open_output(c: *assembler, filename: *byte) { + var fd: int; + + if (c.fdout != 1) { + die("multiple output files"); + } + + unlink(filename); + + fd = open(filename, 64 + 1, (7 << 6) + (7 << 3) +7); + if (fd < 0) { + die("failed to open output"); + } + + c.fdout = fd; +} + +// Create a new label +mklabel(c: *assembler): *label { + var l: *label; + + l = alloc(c.a, sizeof(*l)):*label; + + l.fix = 0:*fixup; + l.at = 0; + l.fixed = 0; + + return l; +} + +// Reserve size in the output buffer +reserve(c: *assembler, n: int) { + var m: *byte; + var b: *chunk; + + if (c.text_end && c.text_end.cap - c.text_end.fill >= n) { + return; + } + + if (n < 4096) { + n = 4096; + } + + m = alloc(c.a, n); + b = alloc(c.a, sizeof(*b)):*chunk; + + b.buf = m; + b.fill = 0; + b.cap = n; + b.next = 0:*chunk; + + if (c.text_end) { + c.text_end.next = b; + c.text_end = b; + } else { + c.text = b; + c.text_end = b; + } +} + +// Add a single byte to the output +emit(c: *assembler, x: int) { + reserve(c, 1); + c.text_end.buf[c.text_end.fill] = x:byte; + c.text_end.fill = c.text_end.fill + 1; + c.at = c.at + 1; +} + +// Fix a single reference +fixup(c: *assembler, here: *byte, delta: int) { + here[0] = delta: byte; + here[1] = (delta >> 8): byte; + here[2] = (delta >> 16): byte; + here[3] = (delta >> 24): byte; +} + +// Add an new fixup for the current position +addfixup(c: *assembler, l: *label) { + var f: *fixup; + var here: *byte; + + reserve(c, 4); + + here = &c.text_end.buf[c.text_end.fill]; + + emit(c, 0); + emit(c, 0); + emit(c, 0); + emit(c, 0); + + if (l.fixed) { + fixup(c, here, l.at - c.at); + } else { + f = alloc(c.a, sizeof(*f)): *fixup; + + f.next = l.fix; + f.ptr = here; + f.at = c.at; + + l.fix = f; + } +} + +// Fix references to a label to the current position +fixup_label(c: *assembler, l: *label) { + var f: *fixup; + + if (l.fixed) { + die("already fixed"); + } + + l.at = c.at; + l.fixed = 1; + + f = l.fix; + loop { + if (!f) { + break; + } + fixup(c, f.ptr, l.at - f.at); + f = f.next; + } +} + +emit_ptr(c: *assembler, l: *label) { + // lea %rax, [l] + emit(c, 0x48); + emit(c, 0x8d); + emit(c, 0x05); + addfixup(c, l); + // push %rax + emit(c, 0x50); +} + +emit_jmp(c: *assembler, l: *label) { + // jmp l + emit(c, 0xe9); + addfixup(c, l); +} + +emit_num(c: *assembler, x: int) { + // push x + emit(c, 0x68); + emit(c, x); + emit(c, x >> 8); + emit(c, x >> 16); + emit(c, x >> 24); +} + +emit_str(c: *assembler, s: *byte) { + var a: *label; + var b: *label; + var i: int; + + a = mklabel(c); + b = mklabel(c); + + // jmp b + emit_jmp(c, b); + + // a: + fixup_label(c, a); + + i = 0; + // .string s + loop { + if (!s[i]) { + break; + } + emit(c, s[i]:int); + i = i + 1; + } + emit(c, 0); + + // nop sled + emit(c, 0x90); + emit(c, 0x90); + emit(c, 0x90); + emit(c, 0x90); + emit(c, 0x90); + emit(c, 0x90); + emit(c, 0x90); + emit(c, 0x90); + + // b: + fixup_label(c, b); + // push a + emit_ptr(c, a); +} + +emit_pop(c: *assembler, n: int) { + n = n * 8; + // add rsp, 8*n + emit(c, 0x48); + emit(c, 0x81); + emit(c, 0xc4); + emit(c, n); + emit(c, n >> 8); + emit(c, n >> 16); + emit(c, n >> 24); +} + +emit_preamble(c: *assembler, n: int, start: int) { + var i: int; + if (start) { + // xor rbp, rbp + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xed); + // mov rdi, [rsp] + emit(c, 0x48); + emit(c, 0x8b); + emit(c, 0x3c); + emit(c, 0x24); + // lea rsi, [rsp + 8] + emit(c, 0x48); + emit(c, 0x8d); + emit(c, 0x74); + emit(c, 0x24); + emit(c, 0x08); + // lea rdx, [rsi + rdi * 8 + 8] + emit(c, 0x48); + emit(c, 0x8d); + emit(c, 0x54); + emit(c, 0xfe); + emit(c, 0x08); + // push rdx + emit(c, 0x52); + // push rsi + emit(c, 0x56); + // push rdi + emit(c, 0x57); + // push rbp + emit(c, 0x55); + } + // push rbp + emit(c, 0x55); + // mov rbp, rsp + emit(c, 0x48); + emit(c, 0x89); + emit(c, 0xe5); + i = 0; + loop { + if (i >= n) { + break; + } + emit_num(c, 0); + i = i + 8; + } +} + +emit_store(c: *assembler, t: *type) { + // pop rdi + emit(c, 0x5f); + // pop rax + emit(c, 0x58); + if (t.kind == TY_BYTE) { + // mov [rdi], al + emit(c, 0x88); + emit(c, 0x07); + } else if (type_isprim(t)) { + // mov [rdi], rax + emit(c, 0x48); + emit(c, 0x89); + emit(c, 0x07); + } else { + die("invalid store"); + } + // push rax + emit(c, 0x50); +} + +emit_load(c: *assembler, t: *type) { + // pop rdi + emit(c, 0x5f); + if (t.kind == TY_BYTE) { + // xor rax, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xc0); + // mov al, [rdi] + emit(c, 0x8a); + emit(c, 0x07); + } else if (type_isprim(t)) { + // mov rax, [rdi] + emit(c, 0x48); + emit(c, 0x8b); + emit(c, 0x07); + } else { + die("invalid load"); + } + // push rax + emit(c, 0x50); +} + +emit_jz(c: *assembler, l: *label) { + // pop rax + emit(c, 0x58); + // test rax, rax + emit(c, 0x48); + emit(c, 0x85); + emit(c, 0xc0); + // jz no + emit(c, 0x0f); + emit(c, 0x84); + addfixup(c, l); +} + +emit_lea(c: *assembler, offset: int) { + // lea rax, [rbp + offset] + emit(c, 0x48); + emit(c, 0x8d); + emit(c, 0x85); + emit(c, offset); + emit(c, offset >> 8); + emit(c, offset >> 16); + emit(c, offset >> 24); + // push rax + emit(c, 0x50); +} + +emit_and(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rdx + emit(c, 0x5a); + // and rdx, rax + emit(c, 0x48); + emit(c, 0x21); + emit(c, 0xd0); + // push rax + emit(c, 0x50); +} + +emit_or(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rdx + emit(c, 0x5a); + // or rdx, rax + emit(c, 0x48); + emit(c, 0x09); + emit(c, 0xd0); + // push rax + emit(c, 0x50); +} + +emit_xor(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rdx + emit(c, 0x5a); + // xor rdx, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xd0); + // push rax + emit(c, 0x50); +} + +emit_add(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rdx + emit(c, 0x5a); + // add rdx, rax + emit(c, 0x48); + emit(c, 0x01); + emit(c, 0xd0); + // push rax + emit(c, 0x50); +} + +emit_ret(c: *assembler) { + // pop rax + emit(c, 0x58); + // mov rsp, rbp + emit(c, 0x48); + emit(c, 0x89); + emit(c, 0xec); + // pop rbp + emit(c, 0x5d); + // ret + emit(c, 0xc3); +} + +emit_call(c: *assembler, n: int) { + // pop rax + emit(c, 0x58); + // call rax + emit(c, 0xff); + emit(c, 0xd0); + // add rsp, 8*(n+1) + emit_pop(c, n); + // push rax + emit(c, 0x50); +} + +emit_gt(c: *assembler) { + // pop rdx + emit(c, 0x5a); + // pop rcx + emit(c, 0x59); + // xor rax, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xc0); + // cmp rdx, rcx + emit(c, 0x48); + emit(c, 0x39); + emit(c, 0xca); + // setg al + emit(c, 0x0f); + emit(c, 0x9f); + emit(c, 0xc0); + // mov rax + emit(c, 0x50); +} + +emit_lt(c: *assembler) { + // pop rdx + emit(c, 0x5a); + // pop rcx + emit(c, 0x59); + // xor rax, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xc0); + // cmp rdx, rcx + emit(c, 0x48); + emit(c, 0x39); + emit(c, 0xca); + // setl al + emit(c, 0x0f); + emit(c, 0x9c); + emit(c, 0xc0); + // mov rax + emit(c, 0x50); +} + +emit_ge(c: *assembler) { + // pop rdx + emit(c, 0x5a); + // pop rcx + emit(c, 0x59); + // xor rax, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xc0); + // cmp rdx, rcx + emit(c, 0x48); + emit(c, 0x39); + emit(c, 0xca); + // setge al + emit(c, 0x0f); + emit(c, 0x9d); + emit(c, 0xc0); + // mov rax + emit(c, 0x50); +} + +emit_le(c: *assembler) { + // pop rdx + emit(c, 0x5a); + // pop rcx + emit(c, 0x59); + // xor rax, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xc0); + // cmp rdx, rcx + emit(c, 0x48); + emit(c, 0x39); + emit(c, 0xca); + // setle al + emit(c, 0x0f); + emit(c, 0x9e); + emit(c, 0xc0); + // mov rax + emit(c, 0x50); +} + +emit_eq(c: *assembler) { + // pop rdx + emit(c, 0x5a); + // pop rcx + emit(c, 0x59); + // xor rax, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xc0); + // cmp rdx, rcx + emit(c, 0x48); + emit(c, 0x39); + emit(c, 0xca); + // sete al + emit(c, 0x0f); + emit(c, 0x94); + emit(c, 0xc0); + // mov rax + emit(c, 0x50); +} + +emit_ne(c: *assembler) { + // pop rdx + emit(c, 0x5a); + // pop rcx + emit(c, 0x59); + // xor rax, rax + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xc0); + // cmp rdx, rcx + emit(c, 0x48); + emit(c, 0x39); + emit(c, 0xca); + // setne al + emit(c, 0x0f); + emit(c, 0x95); + emit(c, 0xc0); + // mov rax + emit(c, 0x50); +} + +emit_sub(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rdx + emit(c, 0x5a); + // add rax, rdx + emit(c, 0x48); + emit(c, 0x29); + emit(c, 0xd0); + // push rax + emit(c, 0x50); +} + +emit_mul(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rcx + emit(c, 0x59); + // mul rcx + emit(c, 0x48); + emit(c, 0xf7); + emit(c, 0xe1); + // push rax + emit(c, 0x50); +} + +emit_div(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rcx + emit(c, 0x59); + // xor rdx, rdx + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xd2); + // test rax, rax + emit(c, 0x48); + emit(c, 0x85); + emit(c, 0xc0); + // sets dl + emit(c, 0x0f); + emit(c, 0x98); + emit(c, 0xc2); + // neg rdx + emit(c, 0x48); + emit(c, 0xf7); + emit(c, 0xda); + // idiv rcx + emit(c, 0x48); + emit(c, 0xf7); + emit(c, 0xf9); + // push rax + emit(c, 0x50); +} + +emit_mod(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rcx + emit(c, 0x59); + // xor rdx, rdx + emit(c, 0x48); + emit(c, 0x31); + emit(c, 0xd2); + // test rax, rax + emit(c, 0x48); + emit(c, 0x85); + emit(c, 0xc0); + // sets dl + emit(c, 0x0f); + emit(c, 0x98); + emit(c, 0xc2); + // neg rdx + emit(c, 0x48); + emit(c, 0xf7); + emit(c, 0xda); + // idiv rcx + emit(c, 0x48); + emit(c, 0xf7); + emit(c, 0xf9); + // push rdx + emit(c, 0x52); +} + +emit_lsh(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rcx + emit(c, 0x59); + // shl rax, cl + emit(c, 0x48); + emit(c, 0xd3); + emit(c, 0xe0); + // push rax + emit(c, 0x50); +} + +emit_rsh(c: *assembler) { + // pop rax + emit(c, 0x58); + // pop rcx + emit(c, 0x59); + // shr rax, cl + emit(c, 0x48); + emit(c, 0xd3); + emit(c, 0xe8); + // push rax + emit(c, 0x50); +} + +emit_not(c: *assembler) { + // pop rax + emit(c, 0x58); + // neg rax + emit(c, 0x48); + emit(c, 0xf7); + emit(c, 0xd0); + // push rax + emit(c, 0x50); +} + +emit_neg(c: *assembler) { + // pop rax + emit(c, 0x58); + // neg rax + emit(c, 0x48); + emit(c, 0xf7); + emit(c, 0xd8); + // push rax + emit(c, 0x50); +} + +emit_syscall(c: *assembler) { + // mov rax, [rbp + 16] + emit(c, 0x48); + emit(c, 0x8b); + emit(c, 0x45); + emit(c, 0x10); + // mov rdi, [rbp + 24] + emit(c, 0x48); + emit(c, 0x8b); + emit(c, 0x7d); + emit(c, 0x18); + // mov rsi, [rbp + 32] + emit(c, 0x48); + emit(c, 0x8b); + emit(c, 0x75); + emit(c, 0x20); + // mov rdx, [rbp + 40] + emit(c, 0x48); + emit(c, 0x8b); + emit(c, 0x55); + emit(c, 0x28); + // mov r10, [rbp + 48] + emit(c, 0x4c); + emit(c, 0x8b); + emit(c, 0x55); + emit(c, 0x30); + // mov r8, [rbp + 56] + emit(c, 0x4c); + emit(c, 0x8b); + emit(c, 0x45); + emit(c, 0x38); + // mov r9, [rbp + 64] + emit(c, 0x4c); + emit(c, 0x8b); + emit(c, 0x4d); + emit(c, 0x40); + // syscall + emit(c, 0x0f); + emit(c, 0x05); + // push rax + emit(c, 0x50); +} + +writeout(c: *assembler, start: *label) { + var b: *chunk; + var i: int; + var text_size: int; + var load_addr: int; + var entry: int; + + load_addr = 0x100000; + text_size = c.at; + + if (!start || !start.fixed) { + die("_start is not defined"); + } + + entry = load_addr + start.at + 128; + text_size = text_size + 128; + + // magic + putchar(c, 0x7f); + putchar(c, 'E'); + putchar(c, 'L'); + putchar(c, 'F'); + + // class + putchar(c, 2); + + // endian + putchar(c, 1); + + // version + putchar(c, 1); + + // abi + putchar(c, 0); + + // abi version + putchar(c, 0); + + // padding + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // type + putchar(c, 2); + putchar(c, 0); + + // machine + putchar(c, 62); + putchar(c, 0); + + // version + putchar(c, 1); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // entry point + putchar(c, entry); + putchar(c, entry >> 8); + putchar(c, entry >> 16); + putchar(c, entry >> 24); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phoff + putchar(c, 64); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // shoff + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // flags + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // ehsize + putchar(c, 64); + putchar(c, 0); + + // phentsize + putchar(c, 56); + putchar(c, 0); + + // phnum + putchar(c, 1); + putchar(c, 0); + + // shentsize + putchar(c, 64); + putchar(c, 0); + + // shnum + putchar(c, 0); + putchar(c, 0); + + // shstrndx + putchar(c, 0); + putchar(c, 0); + + // phdr[0].type + putchar(c, 1); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phdr[0].flags + putchar(c, 5); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phdr[0].offset + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phdr[0].vaddr + putchar(c, 0); + putchar(c, 0); + putchar(c, 0x10); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phdr[0].paddr + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phdr[0].filesize + putchar(c, text_size); + putchar(c, text_size >> 8); + putchar(c, text_size >> 16); + putchar(c, text_size >> 24); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phdr[0].memsize + putchar(c, text_size); + putchar(c, text_size >> 8); + putchar(c, text_size >> 16); + putchar(c, text_size >> 24); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // phdr[0].align + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + putchar(c, 0); + + // nop sled + putchar(c, 0x90); + putchar(c, 0x90); + putchar(c, 0x90); + putchar(c, 0x90); + putchar(c, 0x90); + putchar(c, 0x90); + putchar(c, 0x90); + putchar(c, 0x90); + + b = c.text; + loop { + if (!b) { + break; + } + i = 0; + loop { + if (i >= b.fill) { + break; + } + putchar(c, b.buf[i]: int); + i = i + 1; + } + b = b.next; + } +} diff --git a/buildlex.sh b/buildlex.sh @@ -1,10 +0,0 @@ -#!/bin/bash - -set -ue - -: ${CC:=./cc2} - -${CC} < lex.c > lex -chmod +x lex - -./lex < cc3.l diff --git a/cc0.c b/cc0.c @@ -1999,7 +1999,7 @@ decl(void) // program := func // | func program struct node * -program(void) +program(struct node *p) { struct node *n; struct node *e; @@ -2007,7 +2007,7 @@ program(void) d = decl(); if (!d) { - return 0; + return p; } e = mknode(N_PROGRAM, d, 0); @@ -2020,6 +2020,7 @@ program(void) die("expected eof"); } + e->b = p; return n; } @@ -4344,6 +4345,14 @@ add_stdlib(void) // syscall emit(0x0f); emit(0x05); + // push rax + emit(0x50); + // pop rax + emit(0x58); + // mov rsp, rbp + emit(0x48); + emit(0x89); + emit(0xec); // pop rbp emit(0x5d); // ret @@ -4795,6 +4804,7 @@ main(int argc, char **argv) // Setup the compiler setup(); + p = 0; i = 1; while (1) { if (i >= argc) { @@ -4816,19 +4826,19 @@ main(int argc, char **argv) open_source((unsigned char *)argv[i]); // Parse the program - p = program(); - - // Verify types - typecheck(p); - - // Generate code - translate(p); + p = program(p); close_source(); i = i + 1; } + // Verify types + typecheck(p); + + // Generate code + translate(p); + if (fout == stdout) { open_output((unsigned char *)"a.out"); } diff --git a/cc1.c b/cc1.c @@ -1,49 +1,3 @@ -syscall(n: int, a1: int, a2: int, a3: int, a4: int, a5: int, a6: int): int; - -struct page { - ptr: *byte; - fill: int; - size: int; -} - -struct node { - kind: int; - a: *node; - b: *node; - filename: *byte; - lineno: int; - colno: int; - n: int; - s: *byte; - t: *type; -} - -struct type { - kind: int; - st: *decl; - val: *type; - arg: *type; -} - -struct fixup { - next: *fixup; - ptr: *byte; - at: int; -} - -struct label { - fix: *fixup; - at: int; - fixed: int; -} - -struct chunk { - next: *chunk; - buf: *byte; - fill: int; - cap: int; -} - struct decl { name: *byte; member_name: *byte; @@ -81,7 +35,7 @@ struct decl { struct compiler { // Allocator - page: *page; + a: *alloc; // Lexer fdin: int; @@ -95,2178 +49,54 @@ struct compiler { tmax: int; // Assembler - fdout: int; - at: int; - text: *chunk; - text_end: *chunk; + as: *assembler; // Namespace decls: *decl; } -enum { - T_EOF, - T_IDENT, - T_NUM, - T_HEX, - T_STR, - T_CHAR, - T_LPAR, - T_RPAR, - T_LBRA, - T_RBRA, - T_COMMA, - T_SEMI, - T_COLON, - T_STAR, - T_DOT, - T_NOT, - T_ASSIGN, - T_AMP, - T_OR, - T_XOR, - T_LT, - T_GT, - T_LE, - T_GE, - T_EQ, - T_NE, - T_ADD, - T_SUB, - T_LSH, - T_RSH, - T_BANG, - T_BOR, - T_BAND, - T_LSQ, - T_RSQ, - T_DIV, - T_MOD, -} - -enum { - N_IDENT, - N_NUM, - N_CHAR, - N_STR, - N_STMTLIST, - N_EXPRLIST, - N_CALL, - N_DOT, - N_ARGLIST, - N_FUNC, - N_ARGDECL, - N_FUNCDECL, - N_PROGRAM, - N_FUNCTYPE, - N_PTRTYPE, - N_STRUCT, - N_MEMBERDECL, - N_MEMBERLIST, - N_CONDLIST, - N_COND, - N_ENUM, - N_ENUMLIST, - N_LOOP, - N_BREAK, - N_CONTINUE, - N_RETURN, - N_VARDECL, - N_LABEL, - N_GOTO, - N_ASSIGN, - N_SIZEOF, - N_REF, - N_DEREF, - N_CAST, - N_INDEX, - N_LT, - N_GT, - N_LE, - N_GE, - N_EQ, - N_NE, - N_ADD, - N_SUB, - N_MUL, - N_LSH, - N_RSH, - N_BNOT, - N_BOR, - N_BAND, - N_AND, - N_OR, - N_XOR, - N_NOT, - N_POS, - N_NEG, - N_DIV, - N_MOD, -} - -enum { - TY_VOID, - TY_INT, - TY_BYTE, - TY_PTR, - TY_ARG, - TY_FUNC, - TY_STRUCT, -} - -exit(n: int) { - syscall(60, n, 0, 0, 0, 0, 0); -} - -_start(argc: int, argv: **byte, envp: **byte) { - main(argc, argv, envp); - exit(0); -} - -read(fd: int, buf: *byte, n: int): int { - return syscall(0, fd, buf: int, n, 0, 0, 0); -} - -write(fd: int, buf: *byte, n: int): int { - return syscall(1, fd, buf: int, n, 0, 0, 0); -} - -open(name: *byte, flags: int, mode: int): int { - return syscall(2, name: int, flags, mode, 0, 0, 0); -} - -lseek(fd: int, offset: int, whence: int): int { - return syscall(8, fd, offset, whence, 0, 0, 0); -} - -close(fd: int): int { - return syscall(3, fd, 0, 0, 0, 0, 0); -} - -fchmod(fd: int, mode: int): int { - return syscall(91, fd, mode, 0, 0, 0, 0); -} - -rename(oldname: *byte, newname: *byte): int { - return syscall(82, oldname: int, newname: int, 0, 0, 0, 0); -} - -unlink(name: *byte): int { - return syscall(87, name: int, 0, 0, 0, 0, 0); -} - -mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int { - return syscall(9, addr, len, prot, flags, fd, off); -} - -alloc(c: *compiler, size: int): *byte { - var page: *page; - var mret: int; - var ret: *byte; - var psize: int; - - if (size < 0) { - die(c, "invalid alloc"); - } - - if (size >= 2048) { - size = size + 4095; - size = size & ~4095; - mret = mmap(0, size, 3, 0x22, -1, 0); - if (mret == -1) { - die(c, "out of memory"); - } - ret = mret: *byte; - return ret; - } - - page = c.page; - if (page) { - if (size <= page.size - page.fill) { - mret = page.ptr:int + page.fill; - page.fill = page.fill + size; - ret = mret: *byte; - return ret; - } - } - - psize = 64 * 1024; - - mret = mmap(0, psize, 3, 0x22, -1, 0); - if (mret == -1) { - die(c, "out of memory"); - } - - page = mret: *page; - page.ptr = (&page[1]): *byte; - ret = page.ptr; - page.size = psize - sizeof(*page); - page.fill = size; - - c.page = page; - - return ret; -} - -getchar(c: *compiler): int { - var b: byte; - var ret: int; - ret = read(c.fdin, &b, 1); - if (ret < 0) { - exit(3); - } - if (ret == 0) { - return -1; - } - return b: int; -} - -putchar(c: *compiler, ch: int) { - var b: byte; - var ret: int; - b = ch: byte; - ret = write(c.fdout, &b, 1); - if (ret != 1) { - exit(3); - } -} - -strlen(s: *byte): int { - var ret: int; - ret = 0; - loop { - if (s[ret] == 0:byte) { - return ret; - } - ret = ret + 1; - } -} - -strcmp(a: *byte, b: *byte): int { - var i: int; - - i = 0; - - loop { - if (a[i] > b[i]) { - return 1; - } - - if (a[i] < b[i]) { - return -1; - } - - if (a[i] == 0:byte) { - return 0; - } - - i = i + 1; - } -} - -fdputc(fd: int, ch: int) { - var b: byte; - var ret: int; - b = ch: byte; - ret = write(fd, &b, 1); - if (ret != 1) { - exit(3); - } -} - -fdput(fd: int, msg: *byte) { - var len: int; - var ret: int; - var off: int; - len = strlen(msg); - off = 0; - loop { - if (off == len) { - break; - } - ret = write(fd, msg, len - off); - if (ret < 0) { - exit(3); - } - off = off + ret; - } -} - -fdputh(fd: int, n: int) { - var c: int; - var r: int; - - r = 0; - loop { - if (n == 0) { - break; - } - - r = (r << 4) + (n & 15); - n = n >> 4; - } - - n = r; - - loop { - c = n & 15; - n = n >> 4; - - if (c < 10) { - fdputc(fd, c + '0'); - } else { - fdputc(fd, c + ('a' - 10)); - } - - if (n == 0) { - break; - } - } -} - -fdputd(fd: int, n: int) { - var a: int; - - if (n < 0) { - fdputc(fd, '-'); - a = -(n % 10); - n = n / -10; - } else { - a = n % 10; - n = n / 10; - } - - if (n != 0) { - fdputd(fd, n); - } - - fdputc(fd, '0' + a); -} - show_context(c: *compiler) { - fdput(2, "on "); + fdputs(2, "on "); if (c.filename) { - fdput(2, c.filename); + fdputs(2, c.filename); } - fdput(2, ":"); + fdputs(2, ":"); fdputd(2, c.lineno); - fdput(2, ":"); + fdputs(2, ":"); fdputd(2, c.colno); - fdput(2, "\n"); -} - -die(c: *compiler, msg: *byte) { - show_context(c); - fdput(2, "die: "); - fdput(2, msg); - fdput(2, "\n"); - exit(1); -} - -open_output(c: *compiler, filename: *byte) { - var fd: int; - - c.filename = filename; - - if (c.fdout != 1) { - die(c, "multiple output files"); - } - - unlink(filename); - - fd = open(filename, 64 + 1, (7 << 6) + (7 << 3) + 7); - if (fd < 0) { - die(c, "failed to open output"); - } - - c.fdout = fd; -} - -open_source(c: *compiler, filename: *byte) { - var fd: int; - - c.filename = filename; - c.nc = 0; - c.lineno = 1; - c.colno = 1; - c.tlen = 0; - c.tt = 0; - - fd = open(filename, 0, 0); - if (fd < 0) { - die(c, "failed to open file"); - } - - c.fdin = fd; - c.nc = getchar(c); - - feed(c); -} - -close_source(c: *compiler) { - if (c.fdin != 0) { - close(c.fdin); - } - c.fdin = 0; -} - -comp_setup(c: *compiler) { - c.page = 0:*page; - - c.fdin = 0; - c.nc = 0; - c.filename = 0:*byte; - c.lineno = 1; - c.colno = 1; - c.tlen = 0; - c.tmax = 4096; - c.token = alloc(c, c.tmax); - c.tt = 0; - - c.fdout = 1; - c.at = 0; - c.text = 0:*chunk; - c.text_end = 0:*chunk; - - c.decls = 0:*decl; -} - -feedc(c: *compiler) { - c.nc = getchar(c); - if (c.nc == '\n') { - c.lineno = c.lineno + 1; - c.colno = 0; - } - c.colno = c.colno + 1; -} - -tappend(c: *compiler) { - c.token[c.tlen] = c.nc:byte; - c.tlen = c.tlen + 1; - if (c.tlen == c.tmax) { - die(c, "identifier too long"); - } - c.token[c.tlen] = 0:byte; - feedc(c); -} - -feed(c: *compiler) { - c.tlen = 0; - c.token[0] = 0:byte; - - loop { - if (c.nc == -1) { - // Reached the end of input - c.tt = T_EOF; - return; - } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r') { - // Whitespace - feedc(c); - } else if (c.nc == '\n') { - // Line end - feedc(c); - } else if (c.nc == '/') { - // Comment - feedc(c); - if (c.nc == '/') { - // Read until the end of the comment - loop { - if (c.nc == '\n' || c.nc == -1) { - break; - } - feedc(c); - } - } else { - c.tt = T_DIV; - return; - } - - } else { - // Start of a real token - break; - } - } - - // Identifier - if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') { - feed_ident(c); - return; - } - - // String - if (c.nc == '"') { - feed_str(c); - return; - } - - // Character - if (c.nc == '\'') { - feed_char(c); - return; - } - - // Number - if (c.nc >= '0' && c.nc <= '9') { - feed_num(c); - return; - } - - // Single character tokens - if (c.nc == '(') { - c.tt = T_LPAR; - feedc(c); - } else if (c.nc == ')') { - c.tt = T_RPAR; - feedc(c); - } else if (c.nc == '{') { - c.tt = T_LBRA; - feedc(c); - } else if (c.nc == '}') { - c.tt = T_RBRA; - feedc(c); - } else if (c.nc == ',') { - c.tt = T_COMMA; - feedc(c); - } else if (c.nc == ';') { - c.tt = T_SEMI; - feedc(c); - } else if (c.nc == ':') { - c.tt = T_COLON; - feedc(c); - } else if (c.nc == '*') { - c.tt = T_STAR; - feedc(c); - } else if (c.nc == '.') { - c.tt = T_DOT; - feedc(c); - } else if (c.nc == '=') { - c.tt = T_ASSIGN; - feedc(c); - if (c.nc == '=') { - c.tt = T_EQ; - feedc(c); - } - } else if (c.nc == '&') { - c.tt = T_AMP; - feedc(c); - if (c.nc == '&') { - c.tt = T_BAND; - feedc(c); - } - } else if (c.nc == '~') { - c.tt = T_NOT; - feedc(c); - } else if (c.nc == '|') { - c.tt = T_OR; - feedc(c); - if (c.nc == '|') { - c.tt = T_BOR; - feedc(c); - } - } else if (c.nc == '^') { - c.tt = T_XOR; - feedc(c); - } else if (c.nc == '!') { - c.tt = T_BANG; - feedc(c); - if (c.nc == '=') { - c.tt = T_NE; - feedc(c); - } - } else if (c.nc == '<') { - c.tt = T_LT; - feedc(c); - if (c.nc == '<') { - c.tt = T_LSH; - feedc(c); - } else if (c.nc == '=') { - c.tt = T_LE; - feedc(c); - } - } else if (c.nc == '>') { - c.tt = T_GT; - feedc(c); - if (c.nc == '>') { - c.tt = T_RSH; - feedc(c); - } else if (c.nc == '=') { - c.tt = T_GE; - feedc(c); - } - } else if (c.nc == '[') { - c.tt = T_LSQ; - feedc(c); - } else if (c.nc == ']') { - c.tt = T_RSQ; - feedc(c); - } else if (c.nc == '+') { - c.tt = T_ADD; - feedc(c); - } else if (c.nc == '-') { - c.tt = T_SUB; - feedc(c); - } else if (c.nc == '%') { - c.tt = T_MOD; - feedc(c); - } else { - die(c, "invalid char"); - } -} - -feed_ident(c: *compiler) { - c.tt = T_IDENT; - loop { - if (!((c.nc >= 'a' && c.nc <= 'z') || - (c.nc >= 'A' && c.nc <= 'Z') || - (c.nc >= '0' && c.nc <= '9') || - c.nc == '_')) { - break; - } - tappend(c); - } -} - -hexdig(c: *compiler): int { - if (c.nc >= '0' && c.nc <= '9') { - return c.nc - '0'; - } - - if (c.nc >= 'A' && c.nc <= 'F') { - return (c.nc - 'F') + 10; - } - - if (c.nc >= 'a' && c.nc <= 'f') { - return (c.nc - 'a') + 10; - } - - die(c, "invalid hex digit"); - return 0; -} - -feed_escape(c: *compiler) { - var hex: int; - - // backslash - feedc(c); - - if (c.nc == 't') { - c.nc = '\t'; - } else if (c.nc == 'r') { - c.nc = '\r'; - } else if (c.nc == 'n') { - c.nc = '\n'; - } else if (c.nc == 'x') { - c.nc = getchar(c); - hex = hexdig(c) * 16; - - c.nc = getchar(c); - hex = hex + hexdig(c); - - c.nc = hex; - } else if (c.nc != '\\' && c.nc != '\'' && c.nc != '"') { - die(c, "invalid escape"); - } -} - -feed_str(c: *compiler) { - c.tt = T_STR; - - // quote - feedc(c); - - loop { - if (c.nc == '"') { - feedc(c); - return; - } - - if (c.nc == -1 || c.nc == 0 || c.nc == '\n') { - die(c, "invalid char in string"); - } - - if (c.tlen == c.tmax) { - die(c, "string too long"); - } - - if (c.nc == '\\') { - feed_escape(c); - } - - tappend(c); - } -} - -feed_char(c: *compiler) { - c.tt = T_CHAR; - - // quote - feedc(c); - - if (c.nc == 0 || c.nc == -1 || c.nc == '\'' || c.nc == '\n') { - die(c, "invalid char"); - } - - if (c.nc == '\\') { - feed_escape(c); - } - - tappend(c); - - if (c.nc != '\'') { - die(c, "expected '"); - } - - feedc(c); -} - -feed_hex(c: *compiler) { - c.tt = T_HEX; - - loop { - if (!((c.nc >= '0' && c.nc <= '9') - || (c.nc >= 'a' && c.nc <= 'f') - || (c.nc >= 'A' && c.nc <= 'F'))) { - break; - } - - tappend(c); - } - - if (c.tlen == 0) { - die(c, "expected hex"); - } -} - -feed_num(c: *compiler) { - c.tt = T_NUM; - - if (c.nc == '0') { - tappend(c); - - if (c.nc == 'x') { - feedc(c); - feed_hex(c); - return; - } - } - - loop { - if (!(c.nc >= '0' && c.nc <= '9')) { - break; - } - - tappend(c); - } -} - -mknode(c: *compiler, kind: int, a: *node, b: *node): *node { - var ret: *node; - ret = alloc(c, sizeof(*ret)):*node; - ret.kind = kind; - ret.a = a; - ret.b = b; - ret.filename = c.filename; - ret.lineno = c.lineno; - ret.colno = c.colno; - ret.n = 0; - ret.s = 0:*byte; - ret.t = 0:*type; - return ret; -} - -mknode0(c: *compiler, kind: int): *node { - return mknode(c, kind, 0:*node, 0:*node); -} - -mknode1(c: *compiler, kind: int, a: *node): *node { - return mknode(c, kind, a, 0:*node); -} - -// Copy the current token -intern(c: *compiler): *byte { - var ret: *byte; - var i: int; - - ret = alloc(c, c.tlen + 1); - - i = 0; - loop { - if (i == c.tlen) { - ret[i] = 0:byte; - return ret; - } - - ret[i] = c.token[i]; - - i = i + 1; - } -} - -// ident := IDENT -parse_ident(c: *compiler): *node { - var n: *node; - - if (c.tt != T_IDENT) { - return 0:*node; - } - - n = mknode0(c, N_IDENT); - n.s = intern(c); - feed(c); - - return n; -} - -parse_hex(c: *compiler): *node { - var n: *node; - var x: int; - var d: int; - var i: int; - - x = 0; - i = 0; - loop { - if (i == c.tlen) { - break; - } - - d = c.token[i]:int; - - if (d >= '0' && d <= '9') { - d = d - '0'; - } else if (d >= 'a' && d <= 'f') { - d = (d - 'a') + 10; - } else { - d = (d - 'A') + 10; - } - - x = x * 16; - x = x + d; - i = i + 1; - - if (x > 0x7fffffff) { - die(c, "overflow"); - } - } - - n = mknode0(c, N_NUM); - n.n = x; - feed(c); - - return n; -} - -// num := NUM -parse_num(c: *compiler): *node { - var n: *node; - var x: int; - var d: int; - var i: int; - - if (c.tt == T_HEX) { - return parse_hex(c); - } - - if (c.tt != T_NUM) { - return 0:*node; - } - - x = 0; - i = 0; - loop { - if (i == c.tlen) { - break; - } - - d = (c.token[i]:int) - '0'; - - x = x * 10; - - x = x + d; - i = i + 1; - - if (x > 0x7fffffff) { - die(c, "overflow"); - } - } - - n = mknode0(c, N_NUM); - n.n = x; - feed(c); - - return n; -} - -// chr := CHAR -parse_chr(c: *compiler): *node { - var n: *node; - - if (c.tt != T_CHAR) { - return 0:*node; - } - - n = mknode0(c, N_CHAR); - n.n = c.token[0]:int; - feed(c); - - return n; -} - -// str := STR -parse_str(c: *compiler): *node { - var n: *node; - - if (c.tt != T_STR) { - return 0:*node; - } - - n = mknode0(c, N_STR); - n.s = intern(c); - feed(c); - - return n; -} - -// primary := ident -// | num -// | str -// | chr -// | '(' expr ')' -parse_primary(c: *compiler): *node { - var n: *node; - - n = parse_ident(c); - if (n) { - return n; - } - - n = parse_num(c); - if (n) { - return n; - } - - n = parse_str(c); - if (n) { - return n; - } - - n = parse_chr(c); - if (n) { - return n; - } - - if (c.tt == T_LPAR) { - feed(c); - - n = parse_expr(c); - if (!n) { - die(c, "expected expr"); - } - - if (c.tt != T_RPAR) { - die(c, "expected )"); - } - feed(c); - - return n; - } - - return 0:*node; -} - -// expr_list := expr -// | expr ',' expr_list -parse_expr_list(c: *compiler): *node { - var n: *node; - var e: *node; - var a: *node; - - a = parse_expr(c); - if (!a) { - return 0:*node; - } - - e = mknode1(c, N_EXPRLIST, a); - n = e; - - loop { - if (c.tt != T_COMMA) { - return n; - } - feed(c); - - a = parse_expr(c); - if (!a) { - die(c, "expected expr"); - } - - e.b = mknode1(c, N_EXPRLIST, a); - e = e.b; - } -} - -// post_expr := primary -// | 'sizeof' '(' expr ')' -// | post_expr '[' expr ']' -// | post_expr '(' expr_list ')' -// | post_expr '.' ident -// | post_expr ':' type -parse_post_expr(c: *compiler): *node { - var n: *node; - var b: *node; - - if (c.tt == T_IDENT && !strcmp(c.token, "sizeof")) { - feed(c); - - if (c.tt != T_LPAR) { - die(c, "expected ("); - } - feed(c); - - n = parse_expr(c); - if (!n) { - die(c, "expected expr"); - } - - if (c.tt != T_RPAR) { - die(c, "expected )"); - } - feed(c); - - return mknode1(c, N_SIZEOF, n); - } - - n = parse_primary(c); - if (!n) { - return 0:*node; - } - - loop { - if (c.tt == T_LSQ) { - feed(c); - - b = parse_expr(c); - - if (c.tt != T_RSQ) { - die(c, "expected ]"); - } - feed(c); - - n = mknode(c, N_INDEX, n, b); - } else if (c.tt == T_LPAR) { - feed(c); - - b = parse_expr_list(c); - - if (c.tt != T_RPAR) { - die(c, "expected )"); - } - feed(c); - - n = mknode(c, N_CALL, n, b); - } else if (c.tt == T_DOT) { - feed(c); - - b = parse_ident(c); - if (!b) { - die(c, "expected ident"); - } - - n = mknode(c, N_DOT, n, b); - } else if (c.tt == T_COLON) { - feed(c); - - b = parse_type(c); - if (!b) { - die(c, "expected type"); - } - - n = mknode(c, N_CAST, n, b); - } else { - return n; - } - } -} - -// unary_expr := post_expr -// | '&' unary_expr -// | '*' unary_expr -// | '+' unary_expr -// | '-' unary_expr -// | '~' unary_expr -// | '!' unary_expr -parse_unary_expr(c: *compiler): *node { - var n: *node; - - if (c.tt == T_AMP) { - feed(c); - - n = parse_unary_expr(c); - if (!n) { - die(c, "expected unary_expr"); - } - - return mknode1(c, N_REF, n); - } - - if (c.tt == T_STAR) { - feed(c); - - n = parse_unary_expr(c); - if (!n) { - die(c, "expected unary_expr"); - } - - return mknode1(c, N_DEREF, n); - } - - if (c.tt == T_ADD) { - feed(c); - - n = parse_unary_expr(c); - if (!n) { - die(c, "expected unary_expr"); - } - - return mknode1(c, N_POS, n); - } - - if (c.tt == T_SUB) { - feed(c); - - n = parse_unary_expr(c); - if (!n) { - die(c, "expected unary_expr"); - } - - return mknode1(c, N_NEG, n); - } - - if (c.tt == T_NOT) { - feed(c); - - n = parse_unary_expr(c); - if (!n) { - die(c, "expected unary_expr"); - } - - return mknode1(c, N_NOT, n); - } - - if (c.tt == T_BANG) { - feed(c); - - n = parse_unary_expr(c); - if (!n) { - die(c, "expected unary_expr"); - } - - return mknode1(c, N_BNOT, n); - } - - return parse_post_expr(c); -} - - -// shift_expr := unary_expr -// | shift_expr '<<' unary_expr -// | shift_expr '>>' unary_expr -parse_shift_expr(c: *compiler): *node { - var a: *node; - var b: *node; - - a = parse_unary_expr(c); - if (!a) { - return 0:*node; - } - - loop { - if (c.tt == T_LSH) { - feed(c); - - b = parse_unary_expr(c); - if (!b) { - die(c, "expected unary_expr"); - } - - a = mknode(c, N_LSH, a, b); - } else if (c.tt == T_RSH) { - feed(c); - - b = parse_unary_expr(c); - if (!b) { - die(c, "expected unary_expr"); - } - - a = mknode(c, N_RSH, a, b); - } else { - return a; - } - } -} - -// mul_expr := shift_expr -// | mul_expr '*' shift_expr -// | mul_expr '/' shift_expr -// | mul_expr '%' shift_expr -// | mul_expr '&' shift_expr -parse_mul_expr(c: *compiler): *node { - var a: *node; - var b: *node; - - a = parse_shift_expr(c); - if (!a) { - return 0:*node; - } - - loop { - if (c.tt == T_STAR) { - feed(c); - - b = parse_shift_expr(c); - if (!b) { - die(c, "expected shift_expr"); - } - - a = mknode(c, N_MUL, a, b); - } else if (c.tt == T_DIV) { - feed(c); - - b = parse_shift_expr(c); - if (!b) { - die(c, "expected shift_expr"); - } - - a = mknode(c, N_DIV, a, b); - } else if (c.tt == T_MOD) { - feed(c); - - b = parse_shift_expr(c); - if (!b) { - die(c, "expected shift_expr"); - } - - a = mknode(c, N_MOD, a, b); - } else if (c.tt == T_AMP) { - feed(c); - - b = parse_shift_expr(c); - if (!b) { - die(c, "expected shift_expr"); - } - - a = mknode(c, N_AND, a, b); - } else { - return a; - } - } -} - -// add_expr := mul_expr -// | add_expr '+' mul_expr -// | add_expr '-' mul_expr -// | add_expr '|' mul_expr -// | add_expr '^' mul_expr -parse_add_expr(c: *compiler): *node { - var a: *node; - var b: *node; - - a = parse_mul_expr(c); - if (!a) { - return 0:*node; - } - - loop { - if (c.tt == T_ADD) { - feed(c); - - b = parse_mul_expr(c); - if (!b) { - die(c, "expected mul_expr"); - } - - a = mknode(c, N_ADD, a, b); - } else if (c.tt == T_SUB) { - feed(c); - - b = parse_mul_expr(c); - if (!b) { - die(c, "expected mul_expr"); - } - - a = mknode(c, N_SUB, a, b); - } else if (c.tt == T_OR) { - feed(c); - - b = parse_mul_expr(c); - if (!b) { - die(c, "expected mul_expr"); - } - - a = mknode(c, N_OR, a, b); - } else if (c.tt == T_XOR) { - feed(c); - - b = parse_mul_expr(c); - if (!b) { - die(c, "expected mul_expr"); - } - - a = mknode(c, N_XOR, a, b); - } else { - return a; - } - } -} - -// comp_expr := add_expr -// | add_expr '<' add_expr -// | add_expr '>' add_expr -// | add_expr '<=' add_expr -// | add_expr '>=' add_expr -// | add_expr '==' add_expr -// | add_expr '!=' add_expr -parse_comp_expr(c: *compiler): *node { - var a: *node; - var b: *node; - - a = parse_add_expr(c); - if (!a) { - return 0:*node; - } - - if (c.tt == T_LT) { - feed(c); - - b = parse_add_expr(c); - if (!b) { - die(c, "expected add_expr"); - } - - return mknode(c, N_LT, a, b); - } - - if (c.tt == T_GT) { - feed(c); - - b = parse_add_expr(c); - if (!b) { - die(c, "expected add_expr"); - } - - return mknode(c, N_GT, a, b); - } - - if (c.tt == T_LE) { - feed(c); - - b = parse_add_expr(c); - if (!b) { - die(c, "expected add_expr"); - } - - return mknode(c, N_LE, a, b); - } - - if (c.tt == T_GE) { - feed(c); - - b = parse_add_expr(c); - if (!b) { - die(c, "expected add_expr"); - } - - return mknode(c, N_GE, a, b); - } - - if (c.tt == T_EQ) { - feed(c); - - b = parse_add_expr(c); - if (!b) { - die(c, "expected add_expr"); - } - - return mknode(c, N_EQ, a, b); - } - - if (c.tt == T_NE) { - feed(c); - - b = parse_add_expr(c); - if (!b) { - die(c, "expected add_expr"); - } - - return mknode(c, N_NE, a, b); - } - - return a; -} - -// bool_expr := bool_expr -// | bool_expr '&&' comp_expr -// | bool_expr '||' comp_expr -parse_bool_expr(c: *compiler): *node { - var a: *node; - var b: *node; - - a = parse_comp_expr(c); - if (!a) { - return 0:*node; - } - - if (c.tt == T_BAND) { - feed(c); - - b = parse_bool_expr(c); - if (!b) { - die(c, "expected bool_expr"); - } - - return mknode(c, N_BAND, a, b); - } - - if (c.tt == T_BOR) { - feed(c); - - b = parse_bool_expr(c); - if (!b) { - die(c, "expected bool_expr"); - } - - return mknode(c, N_BOR, a, b); - } - - return a; -} - -// expr := bool_expr -// | bool_expr '=' expr -parse_expr(c: *compiler): *node { - var a: *node; - var b: *node; - - a = parse_bool_expr(c); - if (!a) { - return 0:*node; - } - - if (c.tt == T_ASSIGN) { - feed(c); - - b = parse_expr(c); - if (!b) { - die(c, "expected expr"); - } - - return mknode(c, N_ASSIGN, a, b); - } - - return a; -} - -// if_stmt := 'if' expr '{' stmt_list '}' -// | 'if' expr '{' stmt_list '}' 'else' if_stmt -// | 'if' expr '{' stmt_list '}' 'else' '{' stmt_list '}' -parse_if_stmt(c: *compiler): *node { - var n: *node; - var e: *node; - var a: *node; - var b: *node; - - if (c.tt != T_IDENT || strcmp(c.token, "if")) { - return 0:*node; - } - feed(c); - - n = mknode0(c, N_CONDLIST); - e = n; - - loop { - a = parse_expr(c); - if !a { - die(c, "expected expr"); - } - - if (c.tt != T_LBRA) { - die(c, "expected {"); - } - feed(c); - - b = parse_stmt_list(c); - - if (c.tt != T_RBRA) { - die(c, "expected }"); - } - feed(c); - - e.a = mknode(c, N_COND, a, b); - - if (c.tt != T_IDENT || strcmp(c.token, "else")) { - return n; - } - feed(c); - - if (c.tt == T_LBRA) { - feed(c); - - b = parse_stmt_list(c); - - if (c.tt != T_RBRA) { - die(c, "expected }"); - } - feed(c); - - e.b = mknode1(c, N_CONDLIST, mknode(c, N_COND, 0:*node, b)); - - return n; - } - - if (c.tt != T_IDENT || strcmp(c.token, "if")) { - die(c, "expected if"); - } - feed(c); - - e.b = mknode0(c, N_CONDLIST); - e = e.b; - } -} - -// loop_stmt := 'loop' '{' stmt_list '}' -parse_loop_stmt(c: *compiler): *node { - var a: *node; - - if (c.tt != T_IDENT || strcmp(c.token, "loop")) { - return 0:*node; - } - feed(c); - - if (c.tt != T_LBRA) { - die(c, "expected {"); - } - feed(c); - - a = parse_stmt_list(c); - - if (c.tt != T_RBRA) { - die(c, "expected }"); - } - feed(c); - - return mknode1(c, N_LOOP, a); -} - -// break_stmt := 'break' -parse_break_stmt(c: *compiler): *node { - if (c.tt != T_IDENT || strcmp(c.token, "break")) { - return 0:*node; - } - feed(c); - - return mknode0(c, N_BREAK); -} - -// continue_stmt := 'continue' -parse_continue_stmt(c: *compiler): *node { - if (c.tt != T_IDENT || strcmp(c.token, "continue")) { - return 0:*node; - } - feed(c); - - return mknode0(c, N_CONTINUE); -} - -// return_stmt := 'return' -// | 'return' expr -parse_return_stmt(c: *compiler): *node { - var a: *node; - - if (c.tt != T_IDENT || strcmp(c.token, "return")) { - return 0:*node; - } - feed(c); - - a = parse_expr(c); - - return mknode1(c, N_RETURN, a); -} - -// var_decl := ident ':' type -parse_var_decl(c: *compiler): *node { - var n: *node; - var a: *node; - var b: *node; - - a = parse_ident(c); - if (!a) { - return 0:*node; - } - - if (c.tt != T_COLON) { - die(c, "expected :"); - } - feed(c); - - b = parse_type(c); - if (!b) { - die(c, "expected type"); - } - - return mknode(c, N_VARDECL, a, b); -} - -// var_stmt := 'var' var_decl -parse_var_stmt(c: *compiler): *node { - var a: *node; - - if (c.tt != T_IDENT || strcmp(c.token, "var")) { - return 0:*node; - } - feed(c); - - a = parse_var_decl(c); - if (!a) { - die(c, "expected var_decl"); - } - - return a; -} - -// label_stmt := ':' ident -parse_label_stmt(c: *compiler): *node { - var a: *node; - - if (c.tt != T_COLON) { - return 0:*node; - } - feed(c); - - a = parse_ident(c); - if (!a) { - die(c, "expected ident"); - } - - return mknode1(c, N_LABEL, a); -} - -// goto_stmt := 'goto' ident -parse_goto_stmt(c: *compiler): *node { - var a: *node; - - if (c.tt != T_IDENT || strcmp(c.token, "goto")) { - return 0:*node; - } - feed(c); - - a = parse_ident(c); - if (!a) { - die(c, "expected ident"); - } - - return mknode1(c, N_GOTO, a); -} - -// stmt := if_stmt -// | loop_stmt -// | break_stmt ';' -// | continue_stmt ';' -// | return_stmt ';' -// | var_stmt ';' -// | label_stmt ';' -// | goto_stmt ';' -// | expr ';' -parse_stmt(c: *compiler): *node { - var n: *node; - - n = parse_if_stmt(c); - if (n) { - return n; - } - - n = parse_loop_stmt(c); - if (n) { - return n; - } - - n = parse_break_stmt(c); - if (n) { - if (c.tt != T_SEMI) { - die(c, "expected ;"); - } - feed(c); - - return n; - } - - n = parse_return_stmt(c); - if (n) { - if (c.tt != T_SEMI) { - die(c, "expected ;"); - } - feed(c); - - return n; - } - - n = parse_continue_stmt(c); - if (n) { - if (c.tt != T_SEMI) { - die(c, "expected ;"); - } - feed(c); - - return n; - } - - n = parse_var_stmt(c); - if (n) { - if (c.tt != T_SEMI) { - die(c, "expected ;"); - } - feed(c); - - return n; - } - - n = parse_label_stmt(c); - if (n) { - if (c.tt != T_SEMI) { - die(c, "expected ;"); - } - feed(c); - - return n; - } - - n = parse_goto_stmt(c); - if (n) { - if (c.tt != T_SEMI) { - die(c, "expected ;"); - } - feed(c); - - return n; - } - - n = parse_expr(c); - if (n) { - if (c.tt != T_SEMI) { - die(c, "expected ;"); - } - feed(c); - - return n; - } - - return 0:*node; -} - -// stmt_list := stmt -// | stmt stmt_list -parse_stmt_list(c: *compiler): *node { - var n: *node; - var e: *node; - var a: *node; - - a = parse_stmt(c); - if (!a) { - return 0:*node; - } - - e = mknode1(c, N_STMTLIST, a); - n = e; - - loop { - a = parse_stmt(c); - if (!a) { - return n; - } - - e.b = mknode1(c, N_STMTLIST, a); - e = e.b; - } -} - -// enum_list := ident -// | enum_list ',' enum_list -parse_enum_list(c: *compiler): *node { - var n: *node; - var e: *node; - var a: *node; - - a = parse_ident(c); - if (!a) { - return 0:*node; - } - - e = mknode1(c, N_ENUMLIST, a); - n = e; - - loop { - if (c.tt != T_COMMA) { - return n; - } - feed(c); - - a = parse_ident(c); - if (!a) { - return n; - } - - e.b = mknode1(c, N_ENUMLIST, a); - e = e.b; - } -} - -// enum_decl := 'enum' ident '{' enum_list '}' -parse_enum_decl(c: *compiler): *node { - var b: *node; - - if (c.tt != T_IDENT) { - return 0:*node; - } - - if (strcmp(c.token, "enum")) { - return 0:*node; - } - feed(c); - - if (c.tt != T_LBRA) { - die(c, "expected {"); - } - feed(c); - - b = parse_enum_list(c); - - if (c.tt != T_RBRA) { - die(c, "expected }"); - } - feed(c); - - return mknode(c, N_ENUM, 0:*node, b); -} - -// type := ident -// | '*' type -// | '(' type ')' -// | 'func' func_type -parse_type(c: *compiler): *node { - var n: *node; - - if (c.tt == T_STAR) { - feed(c); - - n = parse_type(c); - - return mknode1(c, N_PTRTYPE, n); - } - - if (c.tt == T_LPAR) { - feed(c); - - n = parse_type(c); - - if (c.tt != T_RPAR) { - die(c, "expected )"); - } - feed(c); - - return n; - } - - if (c.tt == T_IDENT && !strcmp(c.token, "func")) { - feed(c); - - n = parse_func_type(c); - if (!n) { - die(c, "expected func_type"); - } - - return n; - } - - return parse_ident(c); -} - -// member_decl := ident ':' type -parse_member_decl(c: *compiler): *node { - var n: *node; - var a: *node; - var b: *node; - - a = parse_ident(c); - if (!a) { - return 0: *node; - } - - if (c.tt != T_COLON) { - die(c, "expected :"); - } - feed(c); - - b = parse_type(c); - if (!b) { - die(c, "expected type"); - } - - return mknode(c, N_MEMBERDECL, a, b); -} - -// member_list := member_decl -// | member_decl ';' member_list -parse_member_list(c: *compiler): *node { - var n: *node; - var e: *node; - var a: *node; - - a = parse_member_decl(c); - if (!a) { - return 0:*node; - } - - e = mknode1(c, N_MEMBERLIST, a); - n = e; - - loop { - if (c.tt != T_SEMI) { - return n; - } - feed(c); - - a = parse_member_decl(c); - if (!a) { - return n; - } - - e.b = mknode1(c, N_MEMBERLIST, a); - e = e.b; - } -} - -// struct_decl := 'struct' ident '{' member_list '}' -parse_struct_decl(c: *compiler): *node { - var a: *node; - var b: *node; - - if (c.tt != T_IDENT) { - return 0:*node; - } - - if (strcmp(c.token, "struct")) { - return 0:*node; - } - feed(c); - - a = parse_ident(c); - if (!a) { - die(c, "expected ident"); - } - - if (c.tt != T_LBRA) { - die(c, "expected {"); - } - feed(c); - - b = parse_member_list(c); - - if (c.tt != T_RBRA) { - die(c, "expected }"); - } - feed(c); - - return mknode(c, N_STRUCT, a, b); -} - -// arg_decl := ':' type -// ident ':' type -parse_arg_decl(c: *compiler): *node { - var n: *node; - var a: *node; - var b: *node; - - a = parse_ident(c); - if (!a) { - return 0:*node; - } - - if (c.tt != T_COLON) { - die(c, "expected :"); - } - feed(c); - - b = parse_type(c); - if (!b) { - die(c, "expected type"); - } - - return mknode(c, N_ARGDECL, a, b); -} - -// arg_list := arg_decl -// | arg_decl ',' arg_list -parse_arg_list(c: *compiler): *node { - var n: *node; - var e: *node; - var a: *node; - - a = parse_arg_decl(c); - if (!a) { - return 0:*node; - } - - e = mknode1(c, N_ARGLIST, a); - n = e; - - loop { - if (c.tt != T_COMMA) { - return n; - } - feed(c); - - a = parse_arg_decl(c); - if (!a) { - die(c, "expected identifier"); - } - - e.b = mknode1(c, N_ARGLIST, a); - e = e.b; - } -} - -// func_type := '(' ')' ':' type -// | '(' arg_list ')' ':' type -// | '(' ')' -// | '(' arg_list ')' -parse_func_type(c: *compiler): *node { - var a: *node; - var b: *node; - - if (c.tt != T_LPAR) { - return 0: *node; - } - feed(c); - - a = parse_arg_list(c); - - if (c.tt != T_RPAR) { - die(c, "expected )"); - } - feed(c); - - if (c.tt != T_COLON) { - return mknode1(c, N_FUNCTYPE, a); - } - feed(c); - - b = parse_type(c); - if (!b) { - die(c, "expected type"); - } - - return mknode(c, N_FUNCTYPE, a, b); -} - -// func_decl := ident func_type -parse_func_decl(c: *compiler): *node { - var a: *node; - var b: *node; - - a = parse_ident(c); - if (!a) { - return 0:*node; - } - - b = parse_func_type(c); - if (!b) { - die(c, "expected func_type"); - } - - return mknode(c, N_FUNCDECL, a, b); -} - -// func := func_decl '{' stmt_list '}' -// | func_decl ';' -parse_func(c: *compiler): *node { - var n: *node; - var a: *node; - var b: *node; - - a = parse_func_decl(c); - if (!a) { - return 0:*node; - } - - if (c.tt == T_SEMI) { - feed(c); - return a; - } - - if (c.tt != T_LBRA) { - die(c, "expected {"); - } - feed(c); - - b = parse_stmt_list(c); - - if (c.tt != T_RBRA) { - die(c, "expected }"); - } - feed(c); - - return mknode(c, N_FUNC, a, b); -} - -// decl := enum_decl -// | struct_decl -// | func -parse_decl(c: *compiler): *node { - var n: *node; - - n = parse_enum_decl(c); - if (n) { - return n; - } - - n = parse_struct_decl(c); - if (n) { - return n; - } + fdputs(2, "\n"); +} - return parse_func(c); +cdie(c: *compiler, msg: *byte) { + show_context(c); + fdputs(2, "cdie: "); + fdputs(2, msg); + fdputs(2, "\n"); + exit(1); } -// program := decl -// | decl program -parse_program(c: *compiler): *node { - var n: *node; - var e: *node; - var d: *node; +comp_setup(a: *alloc): *compiler { + var c: *compiler; - d = parse_decl(c); - if (!d) { - if (c.tt != T_EOF) { - die(c, "expected eof"); - } - return 0:*node; - } + c = alloc(a, sizeof(*c)): *compiler; - e = mknode1(c, N_PROGRAM, d); - n = e; + c.a = a; - loop { - d = parse_decl(c); - if (!d) { - if (c.tt != T_EOF) { - die(c, "expected eof"); - } + c.fdin = 0; + c.nc = 0; + c.filename = 0:*byte; + c.lineno = 1; + c.colno = 1; + c.tlen = 0; + c.tmax = 4096; + c.token = alloc(c.a, c.tmax); + c.tt = 0; - return n; - } + c.as = setup_assembler(a); - e.b = mknode1(c, N_PROGRAM, d); - e = e.b; - } + c.decls = 0:*decl; + + return c; } compile(c: *compiler, p: *node) { @@ -2287,7 +117,7 @@ compile(c: *compiler, p: *node) { } else if (kind == N_ENUM) { defenum(c, n.a); } else if (kind != N_FUNC && kind != N_FUNCDECL) { - die(c, "invalid decl"); + cdie(c, "invalid decl"); } n = n.b; @@ -2351,7 +181,7 @@ defextern(c: *compiler, n: *node): *decl { d = find(c, name, 0:*byte, 1); if (d.func_defined) { - die(c, "duplicate function"); + cdie(c, "duplicate function"); } d.func_defined = 1; @@ -2375,13 +205,13 @@ defstruct(c: *compiler, n: *node) { name = n.a.s; if (!strcmp(name, "int") || !strcmp(name, "byte") || !strcmp(name, "func")) { - die(c, "reserved word"); + cdie(c, "reserved word"); } d = find(c, name, 0:*byte, 1); if (d.struct_defined) { - die(c, "duplicate struct"); + cdie(c, "duplicate struct"); } d.struct_defined = 1; @@ -2404,7 +234,7 @@ defenum(c: *compiler, n: *node) { d = find(c, name, 0:*byte, 1); if (d.enum_defined) { - die(c, "duplicate enum"); + cdie(c, "duplicate enum"); } d.enum_defined = 1; @@ -2416,26 +246,6 @@ defenum(c: *compiler, n: *node) { } } -type_sizeof(c: *compiler, t: *type): int { - var kind: int; - - kind = t.kind; - if (kind == TY_INT) { - return 8; - } else if (kind == TY_BYTE) { - return 8; - } else if (kind == TY_PTR) { - return 8; - } else if (kind == TY_FUNC) { - return 8; - } else if (kind == TY_STRUCT) { - layout_struct(c, t.st); - return t.st.struct_size; - } else { - die(c, "sizeof: invalid type"); - } -} - layout_struct(c: *compiler, d: *decl) { var m: *node; var offset: int; @@ -2445,7 +255,7 @@ layout_struct(c: *compiler, d: *decl) { if (d.struct_layout_done) { if (d.struct_layout_done == 2) { - die(c, "circular struct definition"); + cdie(c, "circular struct definition"); } return; @@ -2466,7 +276,7 @@ layout_struct(c: *compiler, d: *decl) { md = find(c, d.name, name, 1); if (d.member_defined) { - die(c, "duplicate member"); + cdie(c, "duplicate member"); } md.member_defined = 1; @@ -2506,7 +316,7 @@ compile_func(c: *compiler, d: *decl) { v = find(c, d.name, name, 1); if (v.var_defined) { - die(c, "duplicate argument"); + cdie(c, "duplicate argument"); } v.var_defined = 1; @@ -2522,12 +332,12 @@ compile_func(c: *compiler, d: *decl) { offset = hoist_locals(c, d, d.func_def.b, 0); // Compile the function body - emit_str(c, d.name); - fixup_label(c, d.func_label); - emit_preamble(c, offset, !strcmp(d.name, "_start")); + emit_str(c.as, d.name); + fixup_label(c.as, d.func_label); + emit_preamble(c.as, offset, !strcmp(d.name, "_start")); compile_stmt(c, d, d.func_def.b, 0:*label, 0:*label); - emit_num(c, 0); - emit_ret(c); + emit_num(c.as, 0); + emit_ret(c.as); } hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int { @@ -2568,7 +378,7 @@ hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int { v = find(c, d.name, name, 1); if (v.goto_defined) { - die(c, "duplicate goto"); + cdie(c, "duplicate goto"); } v.goto_defined = 1; @@ -2583,7 +393,7 @@ hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int { v = find(c, d.name, name, 1); if (v.var_defined) { - die(c, "duplicate variable"); + cdie(c, "duplicate variable"); } v.var_type = t; @@ -2596,52 +406,6 @@ hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int { return offset; } -// Unify two types -unify(c: *compiler, a: *type, b: *type) { - var kind: int; - - if (a == b) { - return; - } - - if ((a && !b) || (b && !a) || a.kind != b.kind) { - die(c, "type error"); - } - - kind = a.kind; - if (kind == TY_PTR) { - unify(c, a.val, b.val); - } else if (kind == TY_FUNC) { - unify(c, a.val, b.val); - unify(c, a.arg, b.arg); - } else if (kind == TY_ARG) { - unify(c, a.val, b.val); - unify(c, a.arg, b.arg); - } else if (kind == TY_STRUCT) { - if (a.st != b.st) { - die(c, "type error"); - } - } else if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE) { - die(c, "unify: invalid type"); - } -} - -count_args(c: *compiler, t: *type): int { - var nargs: int; - - nargs = 0; - loop { - if (!t) { - break; - } - - t = t.arg; - nargs = nargs + 1; - } - - return nargs; -} - // Translate an expression compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { var no: *label; @@ -2656,31 +420,31 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { kind = n.kind; if (kind == N_STR) { if (!rhs) { - die(c, "str is not an lexpr"); + cdie(c, "str is not an lexpr"); } - emit_str(c, n.s); + emit_str(c.as, n.s); n.t = mktype1(c, TY_PTR, mktype0(c, TY_BYTE)); } else if (kind == N_NUM) { if (!rhs) { - die(c, "num is not an lexpr"); + cdie(c, "num is not an lexpr"); } - emit_num(c, n.n); + emit_num(c.as, n.n); n.t = mktype0(c, TY_INT); } else if (kind == N_CHAR) { if (!rhs) { - die(c, "char is not an lexpr"); + cdie(c, "char is not an lexpr"); } - emit_num(c, n.n); + emit_num(c.as, n.n); n.t = mktype0(c, TY_INT); } else if (kind == N_EXPRLIST) { if (!rhs) { - die(c, "call is not an lexpr"); + cdie(c, "call is not an lexpr"); } if (n.b) { @@ -2696,7 +460,7 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { } } else if (kind == N_CALL) { if (!rhs) { - die(c, "call is not an lexpr"); + cdie(c, "call is not an lexpr"); } if (n.b) { @@ -2706,7 +470,7 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { compile_expr(c, d, n.a, 1); if (n.a.t.kind != TY_FUNC) { - die(c, "calling not a function"); + cdie(c, "calling not a function"); } if (n.b) { @@ -2715,7 +479,7 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { unify(c, n.a.t.arg, 0: *type); } - emit_call(c, count_args(c, n.a.t.arg)); + emit_call(c.as, count_args(c, n.a.t.arg)); n.t = n.a.t.val; } else if (kind == N_DOT) { @@ -2723,61 +487,61 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { if (n.a.t.kind == TY_PTR) { if (n.a.t.val.kind != TY_STRUCT) { - die(c, "dot not a struct"); + cdie(c, "dot not a struct"); } v = find(c, n.a.t.val.st.name, n.b.s, 0); - emit_load(c, n.a.t); + emit_load(c.as, n.a.t); } else { if (n.a.t.kind != TY_STRUCT) { - die(c, "dot not a struct"); + cdie(c, "dot not a struct"); } v = find(c, n.a.t.st.name, n.b.s, 0); } if (!v || !v.member_defined) { - die(c, "no such member"); + cdie(c, "no such member"); } - emit_num(c, v.member_offset); - emit_add(c); + emit_num(c.as, v.member_offset); + emit_add(c.as); n.t = v.member_type; if (rhs) { - emit_load(c, n.t); + emit_load(c.as, n.t); } } else if (kind == N_IDENT) { v = find(c, n.s, 0:*byte, 0); if (v && v.enum_defined) { - emit_num(c, v.enum_value); + emit_num(c.as, v.enum_value); n.t = mktype0(c, TY_INT); return; } v = find(c, d.name, n.s, 0); if (v && v.var_defined) { - emit_lea(c, v.var_offset); + emit_lea(c.as, v.var_offset); n.t = v.var_type; if (rhs) { - emit_load(c, n.t); + emit_load(c.as, n.t); } return; } v = find(c, n.s, 0:*byte, 0); if (v && v.func_defined) { - emit_ptr(c, v.func_label); + emit_ptr(c.as, v.func_label); n.t = v.func_type; return; } - die(c, "no such variable"); + cdie(c, "no such variable"); } else if (kind == N_ASSIGN) { if (!rhs) { - die(c, "assign is not an lexpr"); + cdie(c, "assign is not an lexpr"); } compile_expr(c, d, n.b, 1); @@ -2787,30 +551,30 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { n.t = n.a.t; - emit_store(c, n.t); + emit_store(c.as, n.t); } else if (kind == N_SIZEOF) { if (!rhs) { - die(c, "sizeof is not an lexpr"); + cdie(c, "sizeof is not an lexpr"); } - out = mklabel(c); + out = mklabel(c.as); - emit_jmp(c, out); + emit_jmp(c.as, out); compile_expr(c, d, n.a, 0); - fixup_label(c, out); + fixup_label(c.as, out); if (n.a.t.kind == TY_BYTE) { - emit_num(c, 1); + emit_num(c.as, 1); } else { - emit_num(c, type_sizeof(c, n.a.t)); + emit_num(c.as, type_sizeof(c, n.a.t)); } n.t = mktype0(c, TY_INT); } else if (kind == N_REF) { if (!rhs) { - die(c, "ref is not an lexpr"); + cdie(c, "ref is not an lexpr"); } compile_expr(c, d, n.a, 0); @@ -2820,435 +584,435 @@ compile_expr(c: *compiler, d: *decl, n: *node, rhs: int) { compile_expr(c, d, n.a, 1); if (n.a.t.kind != TY_PTR) { - die(c, "deref not a pointer"); + cdie(c, "deref not a pointer"); } n.t = n.a.t.val; if (rhs) { - emit_load(c, n.t); + emit_load(c.as, n.t); } } else if (kind == N_INDEX) { compile_expr(c, d, n.a, 1); compile_expr(c, d, n.b, 1); if (n.a.t.kind != TY_PTR) { - die(c, "not a pointer"); + cdie(c, "not a pointer"); } - if (!type_isint(c, n.b.t)) { - die(c, "index: not an int"); + if (!type_isint(n.b.t)) { + cdie(c, "index: not an int"); } n.t = n.a.t.val; if (n.t.kind == TY_BYTE) { - emit_num(c, 1); + emit_num(c.as, 1); } else { - emit_num(c, type_sizeof(c, n.t)); + emit_num(c.as, type_sizeof(c, n.t)); } - emit_mul(c); - emit_add(c); + emit_mul(c.as); + emit_add(c.as); if (rhs) { - emit_load(c, n.t); + emit_load(c.as, n.t); } } else if (kind == N_LT) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_lt(c); + emit_lt(c.as); unify(c, n.a.t, n.b.t); - if (!type_isprim(c, n.a.t)) { - die(c, "lt: not an int"); + if (!type_isprim(n.a.t)) { + cdie(c, "lt: not an int"); } n.t = n.a.t; } else if (kind == N_GT) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_gt(c); + emit_gt(c.as); unify(c, n.a.t, n.b.t); - if (!type_isprim(c, n.a.t)) { - die(c, "gt: not an int"); + if (!type_isprim(n.a.t)) { + cdie(c, "gt: not an int"); } n.t = n.a.t; } else if (kind == N_LE) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_le(c); + emit_le(c.as); unify(c, n.a.t, n.b.t); - if (!type_isprim(c, n.a.t)) { - die(c, "le: not an int"); + if (!type_isprim(n.a.t)) { + cdie(c, "le: not an int"); } n.t = n.a.t; } else if (kind == N_GE) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_ge(c); + emit_ge(c.as); unify(c, n.a.t, n.b.t); - if (!type_isprim(c, n.a.t)) { - die(c, "ge: not an int"); + if (!type_isprim(n.a.t)) { + cdie(c, "ge: not an int"); } n.t = n.a.t; } else if (kind == N_EQ) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_eq(c); + emit_eq(c.as); unify(c, n.a.t, n.b.t); - if (!type_isprim(c, n.a.t)) { - die(c, "eq: not an int"); + if (!type_isprim(n.a.t)) { + cdie(c, "eq: not an int"); } n.t = n.a.t; } else if (kind == N_NE) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_ne(c); + emit_ne(c.as); unify(c, n.a.t, n.b.t); - if (!type_isprim(c, n.a.t)) { - die(c, "ne: not an int"); + if (!type_isprim(n.a.t)) { + cdie(c, "ne: not an int"); } n.t = n.a.t; } else if (kind == N_BNOT) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } - no = mklabel(c); - out = mklabel(c); + no = mklabel(c.as); + out = mklabel(c.as); compile_expr(c, d, n.a, 1); - emit_jz(c, no); - emit_num(c, 0); - emit_jmp(c, out); - fixup_label(c, no); - emit_num(c, 1); - fixup_label(c, out); + emit_jz(c.as, no); + emit_num(c.as, 0); + emit_jmp(c.as, out); + fixup_label(c.as, no); + emit_num(c.as, 1); + fixup_label(c.as, out); - if (!type_isprim(c, n.a.t)) { - die(c, "not an prim"); + if (!type_isprim(n.a.t)) { + cdie(c, "not an prim"); } n.t = mktype0(c, TY_INT); } else if (kind == N_BOR) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } - no = mklabel(c); - out = mklabel(c); + no = mklabel(c.as); + out = mklabel(c.as); compile_expr(c, d, n.a, 1); - emit_jz(c, no); - emit_num(c, 1); - emit_jmp(c, out); + emit_jz(c.as, no); + emit_num(c.as, 1); + emit_jmp(c.as, out); - fixup_label(c, no); - no = mklabel(c); + fixup_label(c.as, no); + no = mklabel(c.as); compile_expr(c, d, n.b, 1); - emit_jz(c, no); - emit_num(c, 1); - emit_jmp(c, out); + emit_jz(c.as, no); + emit_num(c.as, 1); + emit_jmp(c.as, out); - fixup_label(c, no); - emit_num(c, 0); + fixup_label(c.as, no); + emit_num(c.as, 0); - fixup_label(c, out); + fixup_label(c.as, out); - if (!type_isprim(c, n.a.t)) { - die(c, "not an prim"); + if (!type_isprim(n.a.t)) { + cdie(c, "not an prim"); } - if (!type_isprim(c, n.b.t)) { - die(c, "not an prim"); + if (!type_isprim(n.b.t)) { + cdie(c, "not an prim"); } n.t = mktype0(c, TY_INT); } else if (kind == N_BAND) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } - no = mklabel(c); - out = mklabel(c); + no = mklabel(c.as); + out = mklabel(c.as); compile_expr(c, d, n.a, 1); - emit_jz(c, no); + emit_jz(c.as, no); compile_expr(c, d, n.b, 1); - emit_jz(c, no); + emit_jz(c.as, no); - emit_num(c, 1); - emit_jmp(c, out); + emit_num(c.as, 1); + emit_jmp(c.as, out); - fixup_label(c, no); - emit_num(c, 0); + fixup_label(c.as, no); + emit_num(c.as, 0); - fixup_label(c, out); + fixup_label(c.as, out); - if (!type_isprim(c, n.a.t)) { - die(c, "not an prim"); + if (!type_isprim(n.a.t)) { + cdie(c, "not an prim"); } - if (!type_isprim(c, n.b.t)) { - die(c, "not an prim"); + if (!type_isprim(n.b.t)) { + cdie(c, "not an prim"); } n.t = mktype0(c, TY_INT); } else if (kind == N_POS) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.a, 1); - if (!type_isint(c, n.a.t)) { - die(c, "pos: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "pos: not an int"); } n.t = n.a.t; } else if (kind == N_NEG) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.a, 1); - emit_neg(c); + emit_neg(c.as); - if (!type_isint(c, n.a.t)) { - die(c, "neg: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "neg: not an int"); } n.t = n.a.t; } else if (kind == N_NOT) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.a, 1); - emit_not(c); + emit_not(c.as); - if (!type_isint(c, n.a.t)) { - die(c, "not: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "not: not an int"); } n.t = n.a.t; } else if (kind == N_ADD) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_add(c); + emit_add(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "add: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "add: not an int"); } n.t = n.a.t; } else if (kind == N_SUB) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_sub(c); + emit_sub(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "sub: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "sub: not an int"); } n.t = n.a.t; } else if (kind == N_MUL) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_mul(c); + emit_mul(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "mul: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "mul: not an int"); } n.t = n.a.t; } else if (kind == N_DIV) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_div(c); + emit_div(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "div: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "div: not an int"); } n.t = n.a.t; } else if (kind == N_MOD) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_mod(c); + emit_mod(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "mod: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "mod: not an int"); } n.t = n.a.t; } else if (kind == N_LSH) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_lsh(c); + emit_lsh(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "lsh: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "lsh: not an int"); } n.t = n.a.t; } else if (kind == N_RSH) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_rsh(c); + emit_rsh(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "rsh: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "rsh: not an int"); } n.t = n.a.t; } else if (kind == N_AND) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_and(c); + emit_and(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "and: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "and: not an int"); } n.t = n.a.t; } else if (kind == N_OR) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_or(c); + emit_or(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "or: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "or: not an int"); } n.t = n.a.t; } else if (kind == N_XOR) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.b, 1); compile_expr(c, d, n.a, 1); - emit_xor(c); + emit_xor(c.as); unify(c, n.a.t, n.b.t); - if (!type_isint(c, n.a.t)) { - die(c, "xor: not an int"); + if (!type_isint(n.a.t)) { + cdie(c, "xor: not an int"); } n.t = n.a.t; } else if (kind == N_CAST) { if (!rhs) { - die(c, "not lexpr"); + cdie(c, "not lexpr"); } compile_expr(c, d, n.a, 1); - if (!type_isprim(c, n.a.t)) { - die(c, "not a primitive"); + if (!type_isprim(n.a.t)) { + cdie(c, "not a primitive"); } n.t = prototype(c, n.b); } else { - die(c, "not an expression"); + cdie(c, "not an expression"); } } @@ -3269,30 +1033,30 @@ compile_stmt(c: *compiler, d: *decl, n: *node, top: *label, out: *label) { kind = n.kind; if (kind == N_CONDLIST) { - ifout = mklabel(c); + ifout = mklabel(c.as); no = 0: *label; loop { if (no) { - fixup_label(c, no); + fixup_label(c.as, no); } if (!n) { break; } - no = mklabel(c); + no = mklabel(c.as); if (n.a.a) { compile_expr(c, d, n.a.a, 1); - emit_jz(c, no); + emit_jz(c.as, no); } compile_stmt(c, d, n.a.b, top, out); - emit_jmp(c, ifout); + emit_jmp(c.as, ifout); n = n.b; } - fixup_label(c, ifout); + fixup_label(c.as, ifout); } else if (kind == N_STMTLIST) { loop { if (!n) { @@ -3302,48 +1066,48 @@ compile_stmt(c: *compiler, d: *decl, n: *node, top: *label, out: *label) { n = n.b; } } else if (kind == N_LOOP) { - top = mklabel(c); - out = mklabel(c); - fixup_label(c, top); + top = mklabel(c.as); + out = mklabel(c.as); + fixup_label(c.as, top); compile_stmt(c, d, n.a, top, out); - emit_jmp(c, top); - fixup_label(c, out); + emit_jmp(c.as, top); + fixup_label(c.as, out); } else if (kind == N_BREAK) { if (!out) { - die(c, "break outside loop"); + cdie(c, "break outside loop"); } - emit_jmp(c, out); + emit_jmp(c.as, out); } else if (kind == N_CONTINUE) { if (!top) { - die(c, "continue outside loop"); + cdie(c, "continue outside loop"); } - emit_jmp(c, top); + emit_jmp(c.as, top); } else if (kind == N_RETURN) { if (n.a) { if (d.func_type.val.kind == TY_VOID) { - die(c, "returning a value in a void function"); + cdie(c, "returning a value in a void function"); } compile_expr(c, d, n.a, 1); unify(c, n.a.t, d.func_type.val); } else { if (d.func_type.val.kind != TY_VOID) { - die(c, "returning void in a non void function"); + cdie(c, "returning void in a non void function"); } - emit_num(c, 0); + emit_num(c.as, 0); } - emit_ret(c); + emit_ret(c.as); } else if (kind == N_LABEL) { v = find(c, d.name, n.a.s, 0); - fixup_label(c, v.goto_label); + fixup_label(c.as, v.goto_label); } else if (kind == N_GOTO) { v = find(c, d.name, n.a.s, 0); if (!v || !v.goto_defined) { - die(c, "label not defined"); + cdie(c, "label not defined"); } - emit_jmp(c, v.goto_label); + emit_jmp(c.as, v.goto_label); } else if (kind != N_VARDECL) { compile_expr(c, d, n, 1); - emit_pop(c, 1); + emit_pop(c.as, 1); } } @@ -3391,7 +1155,7 @@ find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl { return 0:*decl; } - d = alloc(c, sizeof(*d)): *decl; + d = alloc(c.a, sizeof(*d)): *decl; d.name = name; d.member_name = member_name; @@ -3402,7 +1166,7 @@ find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl { d.func_defined = 0; d.func_type = 0:*type; - d.func_label = mklabel(c); + d.func_label = mklabel(c.as); d.func_def = 0:*node; d.struct_defined = 0; @@ -3425,7 +1189,7 @@ find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl { d.var_def = 0:*node; d.goto_defined = 0; - d.goto_label = mklabel(c); + d.goto_label = mklabel(c.as); *link = d; @@ -3480,1035 +1244,16 @@ next_decl(c: *compiler, d: *decl): *decl { } } -mktype(c: *compiler, kind: int, a: *type, b: *type, st: *decl): *type { - var t: *type; - - t = alloc(c, sizeof(*t)):*type; - - t.kind = kind; - t.st = st; - t.val = a; - t.arg = b; - - return t; -} - -mktype_struct(c: *compiler, st: *decl): *type { - return mktype(c, TY_STRUCT, 0:*type, 0:*type, st); -} - -mktype0(c: *compiler, kind: int): *type { - return mktype(c, kind, 0:*type, 0:*type, 0:*decl); -} - -mktype1(c: *compiler, kind: int, a: *type): *type { - return mktype(c, kind, a, 0:*type, 0:*decl); -} - -mktype2(c: *compiler, kind: int, a: *type, b: *type): *type { - return mktype(c, kind, a, b, 0:*decl); -} - -prototype(c: *compiler, n: *node): *type { - var a: *type; - var b: *type; - var st: *decl; - var kind: int; - - if (!n) { - return 0:*type; - } - - c.lineno = n.lineno; - c.colno = 0; - - kind = n.kind; - if (kind == N_IDENT) { - if (!strcmp(n.s, "void")) { - return mktype0(c, TY_VOID); - } - - if (!strcmp(n.s, "int")) { - return mktype0(c, TY_INT); - } - - if (!strcmp(n.s, "byte")) { - return mktype0(c, TY_BYTE); - } - - st = find(c, n.s, 0:*byte, 0); - if (!st || !st.struct_defined) { - die(c, "unknown struct"); - } - - return mktype_struct(c, st); - } else if (kind == N_ARGLIST) { - a = prototype(c, n.a.b); - b = prototype(c, n.b); - - kind = a.kind; - if (kind != TY_INT && kind != TY_BYTE - && kind != TY_PTR && kind != TY_FUNC) { - die(c, "not a ptr arg"); - } - - return mktype2(c, TY_ARG, a, b); - } else if (kind == N_FUNCTYPE) { - if (n.b) { - a = prototype(c, n.b); - } else{ - a = mktype0(c, TY_VOID); - } - - b = prototype(c, n.a); - - kind = a.kind; - if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE - && kind != TY_PTR && kind != TY_FUNC) { - die(c, "not a ptr return"); - } - - return mktype2(c, TY_FUNC, a, b); - } else if (kind == N_PTRTYPE) { - return mktype1(c, TY_PTR, prototype(c, n.a)); - } else { - die(c, "prototype: invalid type"); - } -} - -type_isint(c: *compiler, t: *type): int { - return t.kind == TY_INT || t.kind == TY_BYTE; -} - -type_isprim(c: *compiler, t: *type): int { - return t.kind != TY_VOID && t.kind != TY_STRUCT; -} - -// Create a new label -mklabel(c: *compiler): *label { - var l: *label; - - l = alloc(c, sizeof(*l)):*label; - - l.fix = 0:*fixup; - l.at = 0; - l.fixed = 0; - - return l; -} - -// Reserve size in the output buffer -reserve(c: *compiler, n: int) { - var m: *byte; - var b: *chunk; - - if (c.text_end && c.text_end.cap - c.text_end.fill >= n) { - return; - } - - if (n < 4096) { - n = 4096; - } - - m = alloc(c, n); - b = alloc(c, sizeof(*b)):*chunk; - - b.buf = m; - b.fill = 0; - b.cap = n; - b.next = 0:*chunk; - - if (c.text_end) { - c.text_end.next = b; - c.text_end = b; - } else { - c.text = b; - c.text_end = b; - } -} - -// Add a single byte to the output -emit(c: *compiler, x: int) { - reserve(c, 1); - c.text_end.buf[c.text_end.fill] = x:byte; - c.text_end.fill = c.text_end.fill + 1; - c.at = c.at + 1; -} - -// Fix a single reference -fixup(c: *compiler, here: *byte, delta: int) { - here[0] = delta: byte; - here[1] = (delta >> 8): byte; - here[2] = (delta >> 16): byte; - here[3] = (delta >> 24): byte; -} - -// Add an new fixup for the current position -addfixup(c: *compiler, l: *label) { - var f: *fixup; - var here: *byte; - - reserve(c, 4); - - here = &c.text_end.buf[c.text_end.fill]; - - emit(c, 0); - emit(c, 0); - emit(c, 0); - emit(c, 0); - - if (l.fixed) { - fixup(c, here, l.at - c.at); - } else { - f = alloc(c, sizeof(*f)): *fixup; - - f.next = l.fix; - f.ptr = here; - f.at = c.at; - - l.fix = f; - } -} - -// Fix references to a label to the current position -fixup_label(c: *compiler, l: *label) { - var f: *fixup; - - if (l.fixed) { - die(c, "already fixed"); - } - - l.at = c.at; - l.fixed = 1; - - f = l.fix; - loop { - if (!f) { - break; - } - fixup(c, f.ptr, l.at - f.at); - f = f.next; - } -} - -emit_ptr(c: *compiler, l: *label) { - // lea %rax, [l] - emit(c, 0x48); - emit(c, 0x8d); - emit(c, 0x05); - addfixup(c, l); - // push %rax - emit(c, 0x50); -} - -emit_jmp(c: *compiler, l: *label) { - // jmp l - emit(c, 0xe9); - addfixup(c, l); -} - -emit_num(c: *compiler, x: int) { - // push x - emit(c, 0x68); - emit(c, x); - emit(c, x >> 8); - emit(c, x >> 16); - emit(c, x >> 24); -} - -emit_str(c: *compiler, s: *byte) { - var a: *label; - var b: *label; - var i: int; - - a = mklabel(c); - b = mklabel(c); - - // jmp b - emit_jmp(c, b); - - // a: - fixup_label(c, a); - - i = 0; - // .string s - loop { - if (!s[i]) { - break; - } - emit(c, s[i]:int); - i = i + 1; - } - emit(c, 0); - - // nop sled - emit(c, 0x90); - emit(c, 0x90); - emit(c, 0x90); - emit(c, 0x90); - emit(c, 0x90); - emit(c, 0x90); - emit(c, 0x90); - emit(c, 0x90); - - // b: - fixup_label(c, b); - // push a - emit_ptr(c, a); -} - -emit_pop(c: *compiler, n: int) { - n = n * 8; - // add rsp, 8*n - emit(c, 0x48); - emit(c, 0x81); - emit(c, 0xc4); - emit(c, n); - emit(c, n >> 8); - emit(c, n >> 16); - emit(c, n >> 24); -} - -emit_preamble(c: *compiler, n: int, start: int) { - var i: int; - if (start) { - // xor rbp, rbp - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xed); - // mov rdi, [rsp] - emit(c, 0x48); - emit(c, 0x8b); - emit(c, 0x3c); - emit(c, 0x24); - // lea rsi, [rsp + 8] - emit(c, 0x48); - emit(c, 0x8d); - emit(c, 0x74); - emit(c, 0x24); - emit(c, 0x08); - // lea rdx, [rsi + rdi * 8 + 8] - emit(c, 0x48); - emit(c, 0x8d); - emit(c, 0x54); - emit(c, 0xfe); - emit(c, 0x08); - // push rdx - emit(c, 0x52); - // push rsi - emit(c, 0x56); - // push rdi - emit(c, 0x57); - // push rbp - emit(c, 0x55); - } - // push rbp - emit(c, 0x55); - // mov rbp, rsp - emit(c, 0x48); - emit(c, 0x89); - emit(c, 0xe5); - i = 0; - loop { - if (i >= n) { - break; - } - emit_num(c, 0); - i = i + 8; - } -} - -emit_store(c: *compiler, t: *type) { - // pop rdi - emit(c, 0x5f); - // pop rax - emit(c, 0x58); - if (t.kind == TY_BYTE) { - // mov [rdi], al - emit(c, 0x88); - emit(c, 0x07); - } else if (type_isprim(c, t)) { - // mov [rdi], rax - emit(c, 0x48); - emit(c, 0x89); - emit(c, 0x07); - } else { - die(c, "invalid store"); - } - // push rax - emit(c, 0x50); -} - -emit_load(c: *compiler, t: *type) { - // pop rdi - emit(c, 0x5f); - if (t.kind == TY_BYTE) { - // xor rax, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xc0); - // mov al, [rdi] - emit(c, 0x8a); - emit(c, 0x07); - } else if (type_isprim(c, t)) { - // mov rax, [rdi] - emit(c, 0x48); - emit(c, 0x8b); - emit(c, 0x07); - } else { - die(c, "invalid load"); - } - // push rax - emit(c, 0x50); -} - -emit_jz(c: *compiler, l: *label) { - // pop rax - emit(c, 0x58); - // test rax, rax - emit(c, 0x48); - emit(c, 0x85); - emit(c, 0xc0); - // jz no - emit(c, 0x0f); - emit(c, 0x84); - addfixup(c, l); -} - -emit_lea(c: *compiler, offset: int) { - // lea rax, [rbp + offset] - emit(c, 0x48); - emit(c, 0x8d); - emit(c, 0x85); - emit(c, offset); - emit(c, offset >> 8); - emit(c, offset >> 16); - emit(c, offset >> 24); - // push rax - emit(c, 0x50); -} - -emit_and(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rdx - emit(c, 0x5a); - // and rdx, rax - emit(c, 0x48); - emit(c, 0x21); - emit(c, 0xd0); - // push rax - emit(c, 0x50); -} - -emit_or(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rdx - emit(c, 0x5a); - // or rdx, rax - emit(c, 0x48); - emit(c, 0x09); - emit(c, 0xd0); - // push rax - emit(c, 0x50); -} - -emit_xor(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rdx - emit(c, 0x5a); - // xor rdx, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xd0); - // push rax - emit(c, 0x50); -} - -emit_add(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rdx - emit(c, 0x5a); - // add rdx, rax - emit(c, 0x48); - emit(c, 0x01); - emit(c, 0xd0); - // push rax - emit(c, 0x50); -} - -emit_ret(c: *compiler) { - // pop rax - emit(c, 0x58); - // mov rsp, rbp - emit(c, 0x48); - emit(c, 0x89); - emit(c, 0xec); - // pop rbp - emit(c, 0x5d); - // ret - emit(c, 0xc3); -} - -emit_call(c: *compiler, n: int) { - // pop rax - emit(c, 0x58); - // call rax - emit(c, 0xff); - emit(c, 0xd0); - // add rsp, 8*(n+1) - emit_pop(c, n); - // push rax - emit(c, 0x50); -} - -emit_gt(c: *compiler) { - // pop rdx - emit(c, 0x5a); - // pop rcx - emit(c, 0x59); - // xor rax, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xc0); - // cmp rdx, rcx - emit(c, 0x48); - emit(c, 0x39); - emit(c, 0xca); - // setg al - emit(c, 0x0f); - emit(c, 0x9f); - emit(c, 0xc0); - // mov rax - emit(c, 0x50); -} - -emit_lt(c: *compiler) { - // pop rdx - emit(c, 0x5a); - // pop rcx - emit(c, 0x59); - // xor rax, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xc0); - // cmp rdx, rcx - emit(c, 0x48); - emit(c, 0x39); - emit(c, 0xca); - // setl al - emit(c, 0x0f); - emit(c, 0x9c); - emit(c, 0xc0); - // mov rax - emit(c, 0x50); -} - -emit_ge(c: *compiler) { - // pop rdx - emit(c, 0x5a); - // pop rcx - emit(c, 0x59); - // xor rax, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xc0); - // cmp rdx, rcx - emit(c, 0x48); - emit(c, 0x39); - emit(c, 0xca); - // setge al - emit(c, 0x0f); - emit(c, 0x9d); - emit(c, 0xc0); - // mov rax - emit(c, 0x50); -} - -emit_le(c: *compiler) { - // pop rdx - emit(c, 0x5a); - // pop rcx - emit(c, 0x59); - // xor rax, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xc0); - // cmp rdx, rcx - emit(c, 0x48); - emit(c, 0x39); - emit(c, 0xca); - // setle al - emit(c, 0x0f); - emit(c, 0x9e); - emit(c, 0xc0); - // mov rax - emit(c, 0x50); -} - -emit_eq(c: *compiler) { - // pop rdx - emit(c, 0x5a); - // pop rcx - emit(c, 0x59); - // xor rax, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xc0); - // cmp rdx, rcx - emit(c, 0x48); - emit(c, 0x39); - emit(c, 0xca); - // sete al - emit(c, 0x0f); - emit(c, 0x94); - emit(c, 0xc0); - // mov rax - emit(c, 0x50); -} - -emit_ne(c: *compiler) { - // pop rdx - emit(c, 0x5a); - // pop rcx - emit(c, 0x59); - // xor rax, rax - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xc0); - // cmp rdx, rcx - emit(c, 0x48); - emit(c, 0x39); - emit(c, 0xca); - // setne al - emit(c, 0x0f); - emit(c, 0x95); - emit(c, 0xc0); - // mov rax - emit(c, 0x50); -} - -emit_sub(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rdx - emit(c, 0x5a); - // add rax, rdx - emit(c, 0x48); - emit(c, 0x29); - emit(c, 0xd0); - // push rax - emit(c, 0x50); -} - -emit_mul(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rcx - emit(c, 0x59); - // mul rcx - emit(c, 0x48); - emit(c, 0xf7); - emit(c, 0xe1); - // push rax - emit(c, 0x50); -} - -emit_div(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rcx - emit(c, 0x59); - // xor rdx, rdx - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xd2); - // test rax, rax - emit(c, 0x48); - emit(c, 0x85); - emit(c, 0xc0); - // sets dl - emit(c, 0x0f); - emit(c, 0x98); - emit(c, 0xc2); - // neg rdx - emit(c, 0x48); - emit(c, 0xf7); - emit(c, 0xda); - // idiv rcx - emit(c, 0x48); - emit(c, 0xf7); - emit(c, 0xf9); - // push rax - emit(c, 0x50); -} - -emit_mod(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rcx - emit(c, 0x59); - // xor rdx, rdx - emit(c, 0x48); - emit(c, 0x31); - emit(c, 0xd2); - // test rax, rax - emit(c, 0x48); - emit(c, 0x85); - emit(c, 0xc0); - // sets dl - emit(c, 0x0f); - emit(c, 0x98); - emit(c, 0xc2); - // neg rdx - emit(c, 0x48); - emit(c, 0xf7); - emit(c, 0xda); - // idiv rcx - emit(c, 0x48); - emit(c, 0xf7); - emit(c, 0xf9); - // push rdx - emit(c, 0x52); -} - -emit_lsh(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rcx - emit(c, 0x59); - // shl rax, cl - emit(c, 0x48); - emit(c, 0xd3); - emit(c, 0xe0); - // push rax - emit(c, 0x50); -} - -emit_rsh(c: *compiler) { - // pop rax - emit(c, 0x58); - // pop rcx - emit(c, 0x59); - // shr rax, cl - emit(c, 0x48); - emit(c, 0xd3); - emit(c, 0xe8); - // push rax - emit(c, 0x50); -} - -emit_not(c: *compiler) { - // pop rax - emit(c, 0x58); - // neg rax - emit(c, 0x48); - emit(c, 0xf7); - emit(c, 0xd0); - // push rax - emit(c, 0x50); -} - -emit_neg(c: *compiler) { - // pop rax - emit(c, 0x58); - // neg rax - emit(c, 0x48); - emit(c, 0xf7); - emit(c, 0xd8); - // push rax - emit(c, 0x50); -} - -gen_builtins(c: *compiler) { - var d: *decl; - - d = find(c, "syscall", 0:*byte, 1); - if (d.func_defined && !d.func_label.fixed) { - d.func_defined = 1; - fixup_label(c, d.func_label); - // push rbp - emit(c, 0x55); - // mov rbp, rsp - emit(c, 0x48); - emit(c, 0x89); - emit(c, 0xe5); - // mov rax, [rbp + 16] - emit(c, 0x48); - emit(c, 0x8b); - emit(c, 0x45); - emit(c, 0x10); - // mov rdi, [rbp + 24] - emit(c, 0x48); - emit(c, 0x8b); - emit(c, 0x7d); - emit(c, 0x18); - // mov rsi, [rbp + 32] - emit(c, 0x48); - emit(c, 0x8b); - emit(c, 0x75); - emit(c, 0x20); - // mov rdx, [rbp + 40] - emit(c, 0x48); - emit(c, 0x8b); - emit(c, 0x55); - emit(c, 0x28); - // mov r10, [rbp + 48] - emit(c, 0x4c); - emit(c, 0x8b); - emit(c, 0x55); - emit(c, 0x30); - // mov r8, [rbp + 56] - emit(c, 0x4c); - emit(c, 0x8b); - emit(c, 0x45); - emit(c, 0x38); - // mov r9, [rbp + 64] - emit(c, 0x4c); - emit(c, 0x8b); - emit(c, 0x4d); - emit(c, 0x40); - // syscall - emit(c, 0x0f); - emit(c, 0x05); - // pop rbp - emit(c, 0x5d); - // ret - emit(c, 0xc3); - } -} - -writeout(c: *compiler) { - var b: *chunk; - var i: int; - var text_size: int; - var load_addr: int; - var entry: int; - var d: *decl; - - load_addr = 0x100000; - text_size = c.at; - - d = find(c, "_start", 0:*byte, 0); - if (!d || !d.func_defined || !d.func_label.fixed) { - die(c, "_start is not defined"); - } - - entry = load_addr + d.func_label.at + 128; - text_size = text_size + 128; - - // magic - putchar(c, 0x7f); - putchar(c, 'E'); - putchar(c, 'L'); - putchar(c, 'F'); - - // class - putchar(c, 2); - - // endian - putchar(c, 1); - - // version - putchar(c, 1); - - // abi - putchar(c, 0); - - // abi version - putchar(c, 0); - - // padding - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // type - putchar(c, 2); - putchar(c, 0); - - // machine - putchar(c, 62); - putchar(c, 0); - - // version - putchar(c, 1); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // entry point - putchar(c, entry); - putchar(c, entry >> 8); - putchar(c, entry >> 16); - putchar(c, entry >> 24); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phoff - putchar(c, 64); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // shoff - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // flags - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // ehsize - putchar(c, 64); - putchar(c, 0); - - // phentsize - putchar(c, 56); - putchar(c, 0); - - // phnum - putchar(c, 1); - putchar(c, 0); - - // shentsize - putchar(c, 64); - putchar(c, 0); - - // shnum - putchar(c, 0); - putchar(c, 0); - - // shstrndx - putchar(c, 0); - putchar(c, 0); - - // phdr[0].type - putchar(c, 1); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phdr[0].flags - putchar(c, 5); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phdr[0].offset - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phdr[0].vaddr - putchar(c, 0); - putchar(c, 0); - putchar(c, 0x10); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phdr[0].paddr - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phdr[0].filesize - putchar(c, text_size); - putchar(c, text_size >> 8); - putchar(c, text_size >> 16); - putchar(c, text_size >> 24); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phdr[0].memsize - putchar(c, text_size); - putchar(c, text_size >> 8); - putchar(c, text_size >> 16); - putchar(c, text_size >> 24); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // phdr[0].align - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - putchar(c, 0); - - // nop sled - putchar(c, 0x90); - putchar(c, 0x90); - putchar(c, 0x90); - putchar(c, 0x90); - putchar(c, 0x90); - putchar(c, 0x90); - putchar(c, 0x90); - putchar(c, 0x90); - - b = c.text; - loop { - if (!b) { - break; - } - i = 0; - loop { - if (i >= b.fill) { - break; - } - putchar(c, b.buf[i]: int); - i = i + 1; - } - b = b.next; - } -} - main(argc: int, argv: **byte, envp: **byte) { - var c: compiler; + var a: alloc; + var c: *compiler; var p: *node; + var d: *decl; var i: int; - comp_setup(&c); + setup_alloc(&a); + + c = comp_setup(&a); i = 1; loop { @@ -4519,25 +1264,42 @@ main(argc: int, argv: **byte, envp: **byte) { if (!strcmp(argv[i], "-o")) { i = i + 1; if (i >= argc) { - die(&c, "invalid -o at end of argument list"); + die("invalid -o at end of argument list"); } - open_output(&c, argv[i]); + open_output(c.as, argv[i]); i = i + 1; continue; } - open_source(&c, argv[i]); - p = parse_program(&c); - close_source(&c); - compile(&c, p); + if (argv[i][0] == '-':byte) { + die("invalid argument"); + } + + open_source(c, argv[i]); + p = parse_program(c, p); + close_source(c); i = i + 1; } - if (c.fdout == 1) { - open_output(&c, "a.out"); + compile(c, p); + + if (c.as.fdout == 1) { + open_output(c.as, "a.out"); + } + + d = find(c, "syscall", 0:*byte, 1); + if (d.func_defined && !d.func_label.fixed) { + fixup_label(c.as, d.func_label); + emit_preamble(c.as, 0, 0); + emit_syscall(c.as); + emit_ret(c.as); + } + + d = find(c, "_start", 0:*byte, 0); + if (!d || !d.func_defined) { + die("no _start"); } - gen_builtins(&c); - writeout(&c); + writeout(c.as, d.func_label); } diff --git a/cc3.y b/cc3.y @@ -0,0 +1,152 @@ +start program; + +primary: ident + | num + | hex + | str + | chr + | lpar expr rpar +; + +expr_list: expr + | expr comma expr_list +; + +post_expr: primary + | sizeof lpar expr rpar + | post_expr lsq expr rsq + | post_expr lpar expr_list rpar + | post_expr dot ident + | post_expr colon type +; + +unary_expr : post_expr + | and unary_expr + | star unary_expr + | plus unary_expr + | minus unary_expr + | tilde unary_expr + | bang unary_expr +; + +shift_expr : unary_expr + | unary_expr lsh shift_expr + | unary_expr rsh shift_expr +; + +mul_expr : shift_expr + | shift_expr star mul_expr + | shift_expr div mul_expr + | shift_expr mod mul_expr + | shift_expr and mul_expr +; + +add_expr : mul_expr + | mul_expr plus add_expr + | mul_expr minus add_expr + | mul_expr or add_expr + | mul_expr xor add_expr +; + +comp_expr : add_expr + | add_expr lt add_expr + | add_expr gt add_expr + | add_expr le add_expr + | add_expr gt add_expr + | add_expr eq add_expr + | add_expr ne add_expr +; + +bool_expr : bool_expr + | add_expr and_then bool_expr + | add_expr or_else bool_expr +; + +expr : bool_expr + | bool_expr equal expr +; + +if_stmt : if expr lbra stmt_list rbra + | if expr lbra stmt_list rbra else if_stmt + | if expr lbra stmt_list rbra else lbra stmt_list rbra +; + +loop_stmt : loop lbra stmt_list rbra; + +break_stmt : break; + +continue_stmt : continue; + +return_stmt : return; + | return expr +; + +var_decl : ident colon type; + +var_stmt : var var_decl; + +label_stmt : colon ident + +goto_stmt : goto ident; + +stmt : if_stmt + | loop_stmt + | break_stmt semi + | continue_stmt semi + | return_stmt semi + | var_stmt semi + | label_stmt semi + | goto_stmt semi + | expr semi +; + +stmt_list : stmt + | stmt stmt_list +; + +enum_list : ident + | enum_list comma enum_list +; + +enum_decl : enum ident lbra enum_list rbra; + +type : ident + | star type + | lpar type rpar + | func func_type +; + +member_decl : ident colon type; + +member_list : member_decl + | member_decl semi member_list +; + +struct_decl : struct ident lbra member_list rbra; + +arg_decl : colon type + ident colon type +; + +arg_list : arg_decl + | arg_decl comma arg_list +; + +func_type : lpar rpar colon type + | lpar arg_list rpar colon type +; + +func_decl : ident func_type; + +func_def : func_decl lbra stmt_list rbra + | func_decl semi +; + +decl : enum_decl + | struct_decl + | func_def +; + +program : decl + | decl program +; diff --git a/genlex.c b/genlex.c @@ -0,0 +1,1115 @@ +getchar(): int { + var b: byte; + var ret: int; + ret = read(0, &b, 1); + if (ret < 0) { + exit(3); + } + if (ret == 0) { + return -1; + } + return b: int; +} + +putchar(ch: int): void { + var b: byte; + var ret: int; + b = ch: byte; + ret = write(1, &b, 1); + if (ret != 1) { + exit(3); + } +} + +struct compiler { + a: alloc; + nc: int; + lineno: int; + colno: int; + tt: int; + n: *nfa; + buf: *byte; + tlen: int; + tmax: int; + tags: *tag; + ntags: int; + nnfa: int; + ndfa: int; + d: *dfa; +} + +setup(c: *compiler): void { + setup_alloc(&c.a); + c.nc = getchar(); + c.lineno = 1; + c.colno = 1; + c.tt = 0; + c.n = 0: *nfa; + c.tmax = 256; + c.tlen = 0; + c.buf = alloc(&c.a, c.tmax); + c.tags = 0: *tag; + c.ntags = 0; + c.nnfa = 0; + c.ndfa = 0; + c.d = 0:*dfa; + feed(c); +} + +feedc(c: *compiler): void { + c.nc = getchar(); + if (c.nc == '\n') { + c.lineno = c.lineno + 1; + c.colno = 0; + } + c.colno = c.colno +1; +} + +enum { + T_EOF, + T_IDENT, + T_LITERAL, + T_CHARSET, + T_DOT, + T_STAR, + T_PLUS, + T_QMARK, + T_LPAR, + T_RPAR, + T_ALT, + T_EQUALS, + T_SEMI, +} + +feed(c: *compiler): void { + c.n = 0:*nfa; + c.tlen = 0; + + loop { + if (c.nc == -1) { + c.tt = T_EOF; + return; + } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r' || c.nc == '\n') { + feedc(c); + } else if (c.nc == '/') { + feedc(c); + if (c.nc == '/') { + loop { + if (c.nc == '\n' || c.nc == -1) { + break; + } + feedc(c); + } + } else { + die("slash but a comment"); + } + } else { + break; + } + } + + if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') { + feed_ident(c); + } else if (c.nc == '"') { + feed_literal(c); + } else if (c.nc == '[') { + feed_charset(c); + } else if (c.nc == '.') { + c.tt = T_DOT; + feedc(c); + } else if (c.nc == '*') { + c.tt = T_STAR; + feedc(c); + } else if (c.nc == '+') { + c.tt = T_PLUS; + feedc(c); + } else if (c.nc == '?') { + c.tt = T_QMARK; + feedc(c); + } else if (c.nc == '(') { + c.tt = T_LPAR; + feedc(c); + } else if (c.nc == ')') { + c.tt = T_RPAR; + feedc(c); + } else if (c.nc == '|') { + c.tt = T_ALT; + feedc(c); + } else if (c.nc == '=') { + c.tt = T_EQUALS; + feedc(c); + } else if (c.nc == ';') { + c.tt = T_SEMI; + feedc(c); + } else { + die("invalid char"); + } +} + +feed_ident(c: *compiler): void { + c.tt = T_IDENT; + loop { + if (!((c.nc >= 'a' && c.nc <= 'z') || + (c.nc >= 'A' && c.nc <= 'Z') || + (c.nc >= '0' && c.nc <= '9') || + c.nc == '_')) { + break; + } + + c.buf[c.tlen] = c.nc:byte; + c.tlen = c.tlen + 1; + if (c.tlen == c.tmax) { + die("ident too long"); + } + c.buf[c.tlen] = 0:byte; + + feedc(c); + } +} + +hexdig(c: *compiler): int { + if (c.nc >= '0' && c.nc <= '9') { + return c.nc - '0'; + } + + if (c.nc >= 'A' && c.nc <= 'F') { + return (c.nc - 'A') + 10; + } + + if (c.nc >= 'a' && c.nc <= 'f') { + return (c.nc - 'a') + 10; + } + + die("invalid hex digit"); +} + +feed_escape(c: *compiler): void { + var hex: int; + + feedc(c); + + if (c.nc == 't') { + c.nc = '\t'; + } else if (c.nc == 'r') { + c.nc = '\r'; + } else if (c.nc == 'n') { + c.nc = '\n'; + } else if (c.nc == '\\') { + c.nc = '\\'; + } else if (c.nc == '\'') { + c.nc = '\''; + } else if (c.nc == '\"') { + c.nc = '\"'; + } else if (c.nc == '-') { + c.nc = '-'; + } else if (c.nc == '[') { + c.nc = '['; + } else if (c.nc == ']') { + c.nc = ']'; + } else if (c.nc == 'x') { + feedc(c); + hex = hexdig(c) * 16; + feedc(c); + hex = hex + hexdig(c); + c.nc = hex; + } else { + die("invalid escape"); + } +} + +feed_literal(c: *compiler): void { + var a: *nfa; + + c.tt = T_LITERAL; + c.n = nfa_empty(c); + + feedc(c); + + loop { + if (c.nc == '"') { + feedc(c); + return; + } + + if (c.nc == -1 || c.nc == 0 || c.nc == '\n') { + die("invalid char in literal"); + } + + if (c.nc == '\\') { + feed_escape(c); + } + + a = nfa_literal(c, c.nc: byte); + c.n = nfa_concat(c, c.n, a); + + feedc(c); + } +} + +feed_charset(c: *compiler): void { + var left: int; + var right: int; + var mode: int; + var nonempty: int; + var i: int; + var a: *nfa; + + c.tt = T_CHARSET; + + feedc(c); + + mode = 1; + nonempty = 0; + + i = left; + loop { + if (i == 256) { + break; + } + c.buf[i] = 0: byte; + i = i + 1; + } + + loop { + if (c.nc == ']') { + feedc(c); + break; + } + + if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') { + die("invalid char in charset"); + } + + if (c.nc == '^') { + if (!mode) { + die("invalid char in charset"); + } + + feedc(c); + if (mode && !nonempty) { + i = 0; + loop { + if (i == 256) { + break; + } + c.buf[i] = 1:byte; + i = i + 1; + } + } + mode = 0; + continue; + } + + if (c.nc == '\\') { + feed_escape(c); + left = c.nc; + feedc(c); + } else { + left = c.nc; + feedc(c); + } + + if (c.nc == '-') { + feedc(c); + + if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') { + die("invalid char in charset"); + } + + if (c.nc == '\\') { + feed_escape(c); + right = c.nc + 1; + feedc(c); + } else { + right = c.nc + 1; + feedc(c); + } + } else { + right = left + 1; + } + + nonempty = 1; + i = left; + loop { + if (i >= right) { + break; + } + c.buf[i] = mode: byte; + i = i + 1; + } + } + + c.n = nfa_empty(c); + c.n.left = 256; + c.n.right = 256; + + i = 0; + loop { + loop { + if (i == 256) { + left = 256; + break; + } + + if (c.buf[i]) { + left = i; + break; + } + + i = i + 1; + } + + loop { + if (i == 256) { + right = 256; + break; + } + + if (!c.buf[i]) { + right = i; + break; + } + + i = i + 1; + } + + if (left >= right) { + break; + } + + a = nfa_empty(c); + a.left = left; + a.right = right; + + c.n = nfa_alt(c, c.n, a); + } +} + +parse_ident(c: *compiler): *tag { + var t: *tag; + if (c.tt != T_IDENT) { + return 0: *tag; + } + t = find_tag(c, c.buf); + feed(c); + return t; +} + +struct tag { + next: *tag; + s: *byte; + id: int; +} + +intern(c: *compiler): *byte { + var s: *byte; + var i: int; + s = alloc(&c.a, c.tlen + 1); + i = 0; + loop { + if (i == c.tlen) { + break; + } + s[i] = c.buf[i]; + i = i + 1; + } + s[c.tlen] = 0:byte; + return s; +} + +find_tag(c: *compiler, s: *byte): *tag { + var t: *tag; + var link: **tag; + link = &c.tags; + loop { + t = *link; + if (!t) { + break; + } + if (!strcmp(t.s, s)) { + return t; + } + link = &t.next; + } + t = alloc(&c.a, sizeof(*t)): *tag; + t.next = 0:*tag; + t.s = intern(c); + t.id = c.ntags; + c.ntags = c.ntags + 1; + *link = t; + return t; +} + +struct nfa { + id: int; + tag: *tag; + left: int; + right: int; + live: int; + a: *nfa; + b: *nfa; + end: *nfa; +} + +nfa_empty(c: *compiler): *nfa { + var n: *nfa; + n = alloc(&c.a, sizeof(*n)):*nfa; + n.id = c.nnfa; + n.left = -1; + n.right = -1; + n.live = 0; + n.a = 0:*nfa; + n.b = 0:*nfa; + n.end = n; + c.nnfa = c.nnfa + 1; + return n; +} + +nfa_literal(c: *compiler, a: byte): *nfa { + var n: *nfa; + n = nfa_empty(c); + n.left = a:int; + n.right = n.left + 1; + return n; +} + +nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa { + var i: *nfa; + var o: *nfa; + i = nfa_empty(c); + o = nfa_empty(c); + i.a = a; + i.b = b; + i.end = o; + a.end.a = o; + b.end.a = o; + return i; +} + +nfa_concat(c: *compiler, a: *nfa, b: *nfa): *nfa { + a.end.a = b; + a.end = b.end; + return a; +} + +nfa_plus(c: *compiler, a: *nfa): *nfa { + var o: *nfa; + o = nfa_empty(c); + o.b = a; + a.end.a = o; + a.end = o; + return a; +} + +nfa_qmark(c: *compiler, a: *nfa): *nfa { + var b: *nfa; + b = nfa_empty(c); + a = nfa_alt(c, a, b); + return a; +} + +nfa_star(c: *compiler, a: *nfa): *nfa { + a = nfa_plus(c, a); + a = nfa_qmark(c, a); + return a; +} + +parse_literal(c: *compiler): *nfa { + var n: *nfa; + if (c.tt != T_LITERAL) { + return 0:*nfa; + } + n = c.n; + feed(c); + return n; +} + +parse_charset(c: *compiler): *nfa { + var n: *nfa; + if (c.tt != T_CHARSET) { + return 0:*nfa; + } + n = c.n; + feed(c); + return n; +} + +parse_dot(c: *compiler): *nfa { + var n: *nfa; + if (c.tt != T_DOT) { + return 0:*nfa; + } + feed(c); + n = nfa_empty(c); + n.left = 0; + n.right = 256; + return n; +} + +// primary := literal +// | charset +// | dot +// | '(' regex ')' +parse_primary(c: *compiler): *nfa { + var n: *nfa; + + n = parse_literal(c); + if (n) { + return n; + } + + n = parse_charset(c); + if (n) { + return n; + } + + n = parse_dot(c); + if (n) { + return n; + } + + if (c.tt == T_LPAR) { + feed(c); + + n = parse_regex(c); + + if (c.tt != T_RPAR) { + die("expected )"); + } + feed(c); + + return n; + } + + return 0: *nfa; +} + +// post := primary +// | post '*' +// | post '+' +// | post '?' +parse_post(c: *compiler): *nfa { + var n: *nfa; + + n = parse_primary(c); + if (!n) { + return 0: *nfa; + } + + loop { + if (c.tt == T_STAR) { + n = nfa_star(c, n); + feed(c); + } else if (c.tt == T_PLUS) { + n = nfa_plus(c, n); + feed(c); + } else if (c.tt == T_QMARK) { + n = nfa_qmark(c, n); + feed(c); + } else { + return n; + } + } +} + +// concat := post +// | post concat +parse_concat(c: *compiler): *nfa { + var n: *nfa; + var b: *nfa; + + n = parse_post(c); + if (!n) { + return 0:*nfa; + } + + loop { + b = parse_post(c); + if (!b) { + return n; + } + + n = nfa_concat(c, n, b); + } +} + +// regex := concat +// | '|' regex +// | concat '|' regex +parse_regex(c: *compiler): *nfa { + var n: *nfa; + var b: *nfa; + + n = parse_concat(c); + if (!n) { + n = nfa_empty(c); + } + + loop { + if (c.tt != T_ALT) { + return n; + } + feed(c); + + b = parse_concat(c); + if (!b) { + b = nfa_empty(c); + } + + n = nfa_alt(c, n, b); + } +} + +// decl := ident '=' regex ';' +parse_decl(c: *compiler): *nfa { + var regex: *nfa; + var t: *tag; + var n: *nfa; + + t = parse_ident(c); + if (!t) { + return 0: *nfa; + } + + if (c.tt != T_EQUALS) { + die("expected ="); + } + feed(c); + + regex = parse_regex(c); + + if (c.tt != T_SEMI) { + die("expected ;"); + } + feed(c); + + n = nfa_empty(c); + n.tag = t; + + return nfa_concat(c, regex, n); +} + +// progam := decl +// | decl program +parse_program(c: *compiler): *nfa { + var n: *nfa; + var p: *nfa; + + p = 0: *nfa; + loop { + n = parse_decl(c); + if (!n) { + if (c.tt != T_EOF) { + die("expected eof"); + } + return p; + } + + if (p) { + p = nfa_alt(c, p, n); + } else { + p = n; + } + } +} + +struct dfa { + id: int; + link: **dfa; + key: nlist; + l: *dfa; + r: *dfa; + seen: int; +} + +struct nlist { + live: **nfa; + fill: int; + cap: int; + tag: *tag; +} + +alloc_nlist(c: *compiler, l: *nlist, cap: int): void { + l.cap = cap; + l.live = alloc(&c.a, sizeof(*l.live) * cap):**nfa; + l.fill = 0; + l.tag = 0:*tag; +} + +activate(l: *nlist, n: *nfa): void { + if (n.live) { + return; + } + + n.live = 1; + + l.live[l.fill] = n; + l.fill = l.fill + 1; + + if (n.left == -1) { + if ((!l.tag) || (n.tag && n.tag.id < l.tag.id)) { + l.tag = n.tag; + } + + if (n.a) { + activate(l, n.a); + } + + if (n.b) { + activate(l, n.b); + } + } +} + +nlist_cmp(a: *nlist, b: *nlist): int { + var i: int; + + i = 0; + loop { + if (i == a.fill && i == b.fill) { + break; + } + + if (i == a.fill) { + return -1; + } + + if (i == b.fill) { + return 1; + } + + if (a.live[i].id > b.live[i].id) { + return 1; + } + + if (a.live[i].id < b.live[i].id) { + return -1; + } + + i = i + 1; + } + + if (!a.tag && !b.tag) { + return 0; + } + + if (!a.tag) { + return -1; + } + + if (!b.tag) { + return 1; + } + + if (a.tag.id < b.tag.id) { + return -1; + } + + if (a.tag.id > b.tag.id) { + return 1; + } + + return 0; +} + +nlist_sort(l: *nlist): void { + var i: int; + var j: int; + var k: int; + var tmp: *nfa; + + if (l.fill < 2) { + return; + } + + i = l.fill - 1; + loop { + j = i; + loop { + k = (j << 1) + 1; + if (k >= l.fill) { + break; + } + if (k + 1 < l.fill && l.live[k].id < l.live[k + 1].id) { + k = k + 1; + } + if (l.live[j].id >= l.live[k].id) { + break; + } + tmp = l.live[k]; + l.live[k] = l.live[j]; + l.live[j] = tmp; + j = k; + } + if (i == 0) { + break; + } + i = i - 1; + } + + i = l.fill - 1; + loop { + if (i == 0) { + break; + } + tmp = l.live[i]; + l.live[i] = l.live[0]; + j = 0; + loop { + k = (j << 1) + 1; + if (k >= i) { + break; + } + if (k + 1 < i && l.live[k].id < l.live[k + 1].id) { + k = k + 1; + } + if (tmp.id >= l.live[k].id) { + break; + } + j = k; + } + l.live[j] = tmp; + i = i - 1; + } +} + +alloc_link(c: *compiler): **dfa { + var link: **dfa; + var i: int; + link = alloc(&c.a, sizeof(*link) * 256): **dfa; + i = 0; + loop { + if (i == 256) { + break; + } + link[i] = 0:*dfa; + i = i + 1; + } + return link; +} + +nlist_copy(c: *compiler, dest: *nlist, src: *nlist): void { + var i: int; + alloc_nlist(c, dest, src.fill); + dest.fill = src.fill; + dest.tag = src.tag; + i = 0; + loop { + if (i >= src.fill) { + break; + } + dest.live[i] = src.live[i]; + i = i + 1; + } +} + +nlist2dfa(c: *compiler, l: *nlist): *dfa { + var link: **dfa; + var d: *dfa; + var n: *nfa; + var t: *tag; + var dir: int; + var i: int; + var j: int; + + if (l.fill == 0 && !l.tag) { + return 0:*dfa; + } + + link = &c.d; + loop { + d = *link; + if (!d) { + break; + } + + dir = nlist_cmp(l, &d.key); + + if (dir < 0) { + link = &d.l; + } else if (dir > 0) { + link = &d.r; + } else { + return d; + } + } + + d = alloc(&c.a, sizeof(*d)): *dfa; + d.id = c.ndfa; + d.link = alloc_link(c); + nlist_copy(c, &d.key, l); + d.l = 0: *dfa; + d.r = 0: *dfa; + d.seen = 0; + + c.ndfa = c.ndfa + 1; + + *link = d; + + i = 0; + loop { + if (i == 256) { + break; + } + + deactivate(l); + j = 0; + loop { + if (j >= d.key.fill) { + break; + } + + n = d.key.live[j]; + if (n.left <= i && i < n.right) { + if (n.a) { + activate(l, n.a); + } + if (n.b) { + activate(l, n.b); + } + } + + j = j + 1; + } + + d.link[i] = nlist2dfa(c, l); + + i = i + 1; + } + + return d; +} + +deactivate(l: *nlist): void { + var i: int; + i = 0; + loop { + if (i >= l.fill) { + l.fill = 0; + l.tag = 0: *tag; + break; + } + l.live[i].live = 0; + i = i + 1; + } +} + +powerset(c: *compiler, n: *nfa): *dfa { + var live: nlist; + alloc_nlist(c, &live, c.nnfa); + activate(&live, n); + return nlist2dfa(c, &live); +} + +codegen(c: *compiler, a: *dfa): void { + var i: int; + var b: *dfa; + var lo: int; + var hi: int; + + if (a.seen) { + return; + } + a.seen = 1; + + fdputs(1, ":_"); + fdputd(1, a.id); + fdputs(1, ";\n"); + + if (a.key.tag) { + fdputs(1, "\tlexmark(l, T_"); + fdputs(1, a.key.tag.s); + fdputs(1, ");\n"); + } + + fdputs(1, "\tch = lexfeedc(l);\n"); + + i = 0; + loop { + loop { + if (i == 256 || a.link[i]) { + lo = i; + break; + } + i = i + 1; + } + + if (lo == 256) { + break; + } + + b = a.link[i]; + + loop { + if (i == 256 || a.link[i] != b) { + hi = i; + break; + } + i = i + 1; + } + + if (lo != (hi - 1)) { + fdputs(1, "\tif (ch >= "); + fdputd(1, lo); + fdputs(1, " && ch <= "); + fdputd(1, hi - 1); + } else { + fdputs(1, "\tif (ch == "); + fdputd(1, lo); + } + fdputs(1, ") { "); + fdputs(1, "goto _"); + fdputd(1, b.id); + fdputs(1, "; }\n"); + + } + + fdputs(1, "\treturn;\n"); + + i = 0; + loop { + if (i == 256) { + break; + } + + if (a.link[i]) { + codegen(c, a.link[i]); + } + + i = i + 1; + } +} + +gen(c: *compiler, a: *dfa): void { + var t: *tag; + t = c.tags; + fdputs(1, "enum {\n"); + fdputs(1, "\tT_INVALID,\n"); + fdputs(1, "\tT_EOF,\n"); + loop { + if (!t) { + break; + } + fdputs(1, "\tT_"); + fdputs(1, t.s); + fdputs(1, ",\n"); + t = t.next; + } + fdputs(1, "}\n"); + fdputs(1, "\n"); + fdputs(1, "lexstep(l: *lex_state): void {\n"); + fdputs(1, "\tvar ch: int;\n"); + fdputs(1, "\tlexmark(l, T_INVALID);\n"); + codegen(c, a); + fdputs(1, "}\n"); +} + +main(argc: int, argv: **byte, envp: **byte): void { + var c: compiler; + var n: *nfa; + var a: *dfa; + setup(&c); + n = parse_program(&c); + a = powerset(&c, n); + gen(&c, a); +} diff --git a/lex.c b/lex.c @@ -1,1289 +0,0 @@ -syscall(n: int, a1: int, a2: int, a3: int, a4: int, a5: int, a6: int): int; - -read(fd: int, buf: *byte, n: int): int { - return syscall(0, fd, buf: int, n, 0, 0, 0); -} - -write(fd: int, buf: *byte, n: int): int { - return syscall(1, fd, buf: int, n, 0, 0, 0); -} - -getchar(): int { - var b: byte; - var ret: int; - ret = read(0, &b, 1); - if (ret < 0) { - exit(3); - } - if (ret == 0) { - return -1; - } - return b: int; -} - -putchar(ch: int): void { - var b: byte; - var ret: int; - b = ch: byte; - ret = write(1, &b, 1); - if (ret != 1) { - exit(3); - } -} - -exit(n: int): void { - syscall(60, n, 0, 0, 0, 0, 0); -} - -strcmp(a: *byte, b: *byte): int { - var i: int; - - i = 0; - - loop { - if (a[i] > b[i]) { - return 1; - } - - if (a[i] < b[i]) { - return -1; - } - - if (a[i] == 0:byte) { - return 0; - } - - i = i + 1; - } -} - -fdputc(fd: int, ch: int): void { - var b: byte; - var ret: int; - b = ch: byte; - ret = write(fd, &b, 1); - if (ret != 1) { - exit(3); - } -} - - -fdputd(fd: int, n: int): void { - var a: int; - - if (n < 0) { - fdputc(fd, '-'); - a = -(n % 10); - n = n / -10; - } else { - a = n % 10; - n = n / 10; - } - - if (n != 0) { - fdputd(fd, n); - } - - fdputc(fd, '0' + a); -} - -fdput(fd: int, msg: *byte): void { - var len: int; - var ret: int; - var off: int; - len = strlen(msg); - off = 0; - loop { - if (off == len) { - break; - } - ret = write(fd, msg, len - off); - if (ret < 0) { - exit(3); - } - off = off + ret; - } -} - -die(msg: *byte): void { - fdput(2, "die: "); - fdput(2, msg); - fdput(2, "\n"); - exit(1); -} - -strlen(s: *byte): int { - var ret: int; - ret = 0; - loop { - if (s[ret] == 0:byte) { - return ret; - } - ret = ret + 1; - } -} - -mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int { - return syscall(9, addr, len, prot, flags, fd, off); -} - -struct page { - ptr: *byte; - fill: int; - size: int; -} - -struct allocator { - page: *page; -} - -setup_alloc(a: *allocator): void { - a.page = 0:*page; -} - -alloc(a: *allocator, size: int): *byte { - var page: *page; - var mret: int; - var ret: *byte; - var psize: int; - - if (size < 0) { - die("invalid alloc"); - } - - if (size >= 2048) { - size = size + 4095; - size = size & ~4095; - mret = mmap(0, size, 3, 0x22, -1, 0); - if (mret == -1) { - die("out of memory"); - } - ret = mret: *byte; - return ret; - } - - page = a.page; - if (page) { - if (size <= page.size - page.fill) { - mret = page.ptr:int + page.fill; - page.fill = page.fill + size; - ret = mret: *byte; - return ret; - } - } - - psize = 64 * 1024; - - mret = mmap(0, psize, 3, 0x22, -1, 0); - if (mret == -1) { - die("out of memory"); - } - - page = mret: *page; - page.ptr = (&page[1]): *byte; - ret = page.ptr; - page.size = psize - sizeof(*page); - page.fill = size; - - a.page = page; - - return ret; -} - -_start(): void { - main(); - exit(0); -} - -struct compiler { - a: allocator; - nc: int; - lineno: int; - colno: int; - tt: int; - n: *nfa; - buf: *byte; - tlen: int; - tmax: int; - tags: *tag; - ntags: int; - nnfa: int; - ndfa: int; - d: *dfa; -} - -setup(c: *compiler): void { - setup_alloc(&c.a); - c.nc = getchar(); - c.lineno = 1; - c.colno = 1; - c.tt = 0; - c.n = 0: *nfa; - c.tmax = 256; - c.tlen = 0; - c.buf = alloc(&c.a, c.tmax); - c.tags = 0: *tag; - c.ntags = 0; - c.nnfa = 0; - c.ndfa = 0; - c.d = 0:*dfa; - feed(c); -} - -feedc(c: *compiler): void { - c.nc = getchar(); - if (c.nc == '\n') { - c.lineno = c.lineno + 1; - c.colno = 0; - } - c.colno = c.colno +1; -} - -enum { - T_EOF, - T_IDENT, - T_LITERAL, - T_CHARSET, - T_DOT, - T_STAR, - T_PLUS, - T_QMARK, - T_LPAR, - T_RPAR, - T_ALT, - T_EQUALS, - T_SEMI, -} - -feed(c: *compiler): void { - c.n = 0:*nfa; - c.tlen = 0; - - loop { - if (c.nc == -1) { - c.tt = T_EOF; - return; - } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r' || c.nc == '\n') { - feedc(c); - } else if (c.nc == '/') { - feedc(c); - if (c.nc == '/') { - loop { - if (c.nc == '\n' || c.nc == -1) { - break; - } - feedc(c); - } - } else { - die("slash but a comment"); - } - } else { - break; - } - } - - if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') { - feed_ident(c); - } else if (c.nc == '"') { - feed_literal(c); - } else if (c.nc == '[') { - feed_charset(c); - } else if (c.nc == '.') { - c.tt = T_DOT; - feedc(c); - } else if (c.nc == '*') { - c.tt = T_STAR; - feedc(c); - } else if (c.nc == '+') { - c.tt = T_PLUS; - feedc(c); - } else if (c.nc == '?') { - c.tt = T_QMARK; - feedc(c); - } else if (c.nc == '(') { - c.tt = T_LPAR; - feedc(c); - } else if (c.nc == ')') { - c.tt = T_RPAR; - feedc(c); - } else if (c.nc == '|') { - c.tt = T_ALT; - feedc(c); - } else if (c.nc == '=') { - c.tt = T_EQUALS; - feedc(c); - } else if (c.nc == ';') { - c.tt = T_SEMI; - feedc(c); - } else { - die("invalid char"); - } -} - -feed_ident(c: *compiler): void { - c.tt = T_IDENT; - loop { - if (!((c.nc >= 'a' && c.nc <= 'z') || - (c.nc >= 'A' && c.nc <= 'Z') || - (c.nc >= '0' && c.nc <= '9') || - c.nc == '_')) { - break; - } - - c.buf[c.tlen] = c.nc:byte; - c.tlen = c.tlen + 1; - if (c.tlen == c.tmax) { - die("ident too long"); - } - c.buf[c.tlen] = 0:byte; - - feedc(c); - } -} - -hexdig(c: *compiler): int { - if (c.nc >= '0' && c.nc <= '9') { - return c.nc - '0'; - } - - if (c.nc >= 'A' && c.nc <= 'F') { - return (c.nc - 'A') + 10; - } - - if (c.nc >= 'a' && c.nc <= 'f') { - return (c.nc - 'a') + 10; - } - - die("invalid hex digit"); -} - -feed_escape(c: *compiler): void { - var hex: int; - - feedc(c); - - if (c.nc == 't') { - c.nc = '\t'; - } else if (c.nc == 'r') { - c.nc = '\r'; - } else if (c.nc == 'n') { - c.nc = '\n'; - } else if (c.nc == '\\') { - c.nc = '\\'; - } else if (c.nc == '\'') { - c.nc = '\''; - } else if (c.nc == '\"') { - c.nc = '\"'; - } else if (c.nc == '-') { - c.nc = '-'; - } else if (c.nc == '[') { - c.nc = '['; - } else if (c.nc == ']') { - c.nc = ']'; - } else if (c.nc == 'x') { - feedc(c); - hex = hexdig(c) * 16; - feedc(c); - hex = hex + hexdig(c); - c.nc = hex; - } else { - die("invalid escape"); - } -} - -feed_literal(c: *compiler): void { - var a: *nfa; - - c.tt = T_LITERAL; - c.n = nfa_empty(c); - - feedc(c); - - loop { - if (c.nc == '"') { - feedc(c); - return; - } - - if (c.nc == -1 || c.nc == 0 || c.nc == '\n') { - die("invalid char in literal"); - } - - if (c.nc == '\\') { - feed_escape(c); - } - - a = nfa_literal(c, c.nc: byte); - c.n = nfa_concat(c, c.n, a); - - feedc(c); - } -} - -feed_charset(c: *compiler): void { - var left: int; - var right: int; - var mode: int; - var nonempty: int; - var i: int; - var a: *nfa; - - c.tt = T_CHARSET; - - feedc(c); - - mode = 1; - nonempty = 0; - - i = left; - loop { - if (i == 256) { - break; - } - c.buf[i] = 0: byte; - i = i + 1; - } - - loop { - if (c.nc == ']') { - feedc(c); - break; - } - - if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') { - die("invalid char in charset"); - } - - if (c.nc == '^') { - if (!mode) { - die("invalid char in charset"); - } - - feedc(c); - if (mode && !nonempty) { - i = 0; - loop { - if (i == 256) { - break; - } - c.buf[i] = 1:byte; - i = i + 1; - } - } - mode = 0; - continue; - } - - if (c.nc == '\\') { - feed_escape(c); - left = c.nc; - feedc(c); - } else { - left = c.nc; - feedc(c); - } - - if (c.nc == '-') { - feedc(c); - - if (c.nc == -1 || c.nc == 0 || c.nc == '\n' || c.nc == '-') { - die("invalid char in charset"); - } - - if (c.nc == '\\') { - feed_escape(c); - right = c.nc + 1; - feedc(c); - } else { - right = c.nc + 1; - feedc(c); - } - } else { - right = left + 1; - } - - nonempty = 1; - i = left; - loop { - if (i >= right) { - break; - } - c.buf[i] = mode: byte; - i = i + 1; - } - } - - c.n = nfa_empty(c); - c.n.left = 256; - c.n.right = 256; - - i = 0; - loop { - loop { - if (i == 256) { - left = 256; - break; - } - - if (c.buf[i]) { - left = i; - break; - } - - i = i + 1; - } - - loop { - if (i == 256) { - right = 256; - break; - } - - if (!c.buf[i]) { - right = i; - break; - } - - i = i + 1; - } - - if (left >= right) { - break; - } - - a = nfa_empty(c); - a.left = left; - a.right = right; - - c.n = nfa_alt(c, c.n, a); - } -} - -parse_ident(c: *compiler): *tag { - var t: *tag; - if (c.tt != T_IDENT) { - return 0: *tag; - } - t = find_tag(c, c.buf); - feed(c); - return t; -} - -struct tag { - next: *tag; - s: *byte; - id: int; -} - -intern(c: *compiler): *byte { - var s: *byte; - var i: int; - s = alloc(&c.a, c.tlen + 1); - i = 0; - loop { - if (i == c.tlen) { - break; - } - s[i] = c.buf[i]; - i = i + 1; - } - s[c.tlen] = 0:byte; - return s; -} - -find_tag(c: *compiler, s: *byte): *tag { - var t: *tag; - var link: **tag; - link = &c.tags; - loop { - t = *link; - if (!t) { - break; - } - if (!strcmp(t.s, s)) { - return t; - } - link = &t.next; - } - t = alloc(&c.a, sizeof(*t)): *tag; - t.next = 0:*tag; - t.s = intern(c); - t.id = c.ntags; - c.ntags = c.ntags + 1; - *link = t; - return t; -} - -struct nfa { - id: int; - tag: *tag; - left: int; - right: int; - live: int; - a: *nfa; - b: *nfa; - end: *nfa; -} - -nfa_empty(c: *compiler): *nfa { - var n: *nfa; - n = alloc(&c.a, sizeof(*n)):*nfa; - n.id = c.nnfa; - n.left = -1; - n.right = -1; - n.live = 0; - n.a = 0:*nfa; - n.b = 0:*nfa; - n.end = n; - c.nnfa = c.nnfa + 1; - return n; -} - -nfa_literal(c: *compiler, a: byte): *nfa { - var n: *nfa; - n = nfa_empty(c); - n.left = a:int; - n.right = n.left + 1; - return n; -} - -nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa { - var i: *nfa; - var o: *nfa; - i = nfa_empty(c); - o = nfa_empty(c); - i.a = a; - i.b = b; - i.end = o; - a.end.a = o; - b.end.a = o; - return i; -} - -nfa_concat(c: *compiler, a: *nfa, b: *nfa): *nfa { - a.end.a = b; - a.end = b.end; - return a; -} - -nfa_plus(c: *compiler, a: *nfa): *nfa { - var o: *nfa; - o = nfa_empty(c); - o.b = a; - a.end.a = o; - a.end = o; - return a; -} - -nfa_qmark(c: *compiler, a: *nfa): *nfa { - var b: *nfa; - b = nfa_empty(c); - a = nfa_alt(c, a, b); - return a; -} - -nfa_star(c: *compiler, a: *nfa): *nfa { - a = nfa_plus(c, a); - a = nfa_qmark(c, a); - return a; -} - -parse_literal(c: *compiler): *nfa { - var n: *nfa; - if (c.tt != T_LITERAL) { - return 0:*nfa; - } - n = c.n; - feed(c); - return n; -} - -parse_charset(c: *compiler): *nfa { - var n: *nfa; - if (c.tt != T_CHARSET) { - return 0:*nfa; - } - n = c.n; - feed(c); - return n; -} - -parse_dot(c: *compiler): *nfa { - var n: *nfa; - if (c.tt != T_DOT) { - return 0:*nfa; - } - feed(c); - n = nfa_empty(c); - n.left = 0; - n.right = 256; - return n; -} - -// primary := literal -// | charset -// | dot -// | '(' regex ')' -parse_primary(c: *compiler): *nfa { - var n: *nfa; - - n = parse_literal(c); - if (n) { - return n; - } - - n = parse_charset(c); - if (n) { - return n; - } - - n = parse_dot(c); - if (n) { - return n; - } - - if (c.tt == T_LPAR) { - feed(c); - - n = parse_regex(c); - - if (c.tt != T_RPAR) { - die("expected )"); - } - feed(c); - - return n; - } - - return 0: *nfa; -} - -// post := primary -// | post '*' -// | post '+' -// | post '?' -parse_post(c: *compiler): *nfa { - var n: *nfa; - - n = parse_primary(c); - if (!n) { - return 0: *nfa; - } - - loop { - if (c.tt == T_STAR) { - n = nfa_star(c, n); - feed(c); - } else if (c.tt == T_PLUS) { - n = nfa_plus(c, n); - feed(c); - } else if (c.tt == T_QMARK) { - n = nfa_qmark(c, n); - feed(c); - } else { - return n; - } - } -} - -// concat := post -// | post concat -parse_concat(c: *compiler): *nfa { - var n: *nfa; - var b: *nfa; - - n = parse_post(c); - if (!n) { - return 0:*nfa; - } - - loop { - b = parse_post(c); - if (!b) { - return n; - } - - n = nfa_concat(c, n, b); - } -} - -// regex := concat -// | '|' regex -// | concat '|' regex -parse_regex(c: *compiler): *nfa { - var n: *nfa; - var b: *nfa; - - n = parse_concat(c); - if (!n) { - n = nfa_empty(c); - } - - loop { - if (c.tt != T_ALT) { - return n; - } - feed(c); - - b = parse_concat(c); - if (!b) { - b = nfa_empty(c); - } - - n = nfa_alt(c, n, b); - } -} - -// decl := ident '=' regex ';' -parse_decl(c: *compiler): *nfa { - var regex: *nfa; - var t: *tag; - var n: *nfa; - - t = parse_ident(c); - if (!t) { - return 0: *nfa; - } - - if (c.tt != T_EQUALS) { - die("expected ="); - } - feed(c); - - regex = parse_regex(c); - - if (c.tt != T_SEMI) { - die("expected ;"); - } - feed(c); - - n = nfa_empty(c); - n.tag = t; - - return nfa_concat(c, regex, n); -} - -// progam := decl -// | decl program -parse_program(c: *compiler): *nfa { - var n: *nfa; - var p: *nfa; - - p = 0: *nfa; - loop { - n = parse_decl(c); - if (!n) { - if (c.tt != T_EOF) { - die("expected eof"); - } - return p; - } - - if (p) { - p = nfa_alt(c, p, n); - } else { - p = n; - } - } -} - -struct dfa { - id: int; - link: **dfa; - key: nlist; - l: *dfa; - r: *dfa; - seen: int; -} - -struct nlist { - live: **nfa; - fill: int; - cap: int; - tag: *tag; -} - -alloc_nlist(c: *compiler, l: *nlist, cap: int): void { - l.cap = cap; - l.live = alloc(&c.a, sizeof(*l.live) * cap):**nfa; - l.fill = 0; - l.tag = 0:*tag; -} - -activate(l: *nlist, n: *nfa): void { - if (n.live) { - return; - } - - n.live = 1; - - l.live[l.fill] = n; - l.fill = l.fill + 1; - - if (n.left == -1) { - if ((!l.tag) || (n.tag && n.tag.id < l.tag.id)) { - l.tag = n.tag; - } - - if (n.a) { - activate(l, n.a); - } - - if (n.b) { - activate(l, n.b); - } - } -} - -nlist_cmp(a: *nlist, b: *nlist): int { - var i: int; - - i = 0; - loop { - if (i == a.fill && i == b.fill) { - break; - } - - if (i == a.fill) { - return -1; - } - - if (i == b.fill) { - return 1; - } - - if (a.live[i].id > b.live[i].id) { - return 1; - } - - if (a.live[i].id < b.live[i].id) { - return -1; - } - - i = i + 1; - } - - if (!a.tag && !b.tag) { - return 0; - } - - if (!a.tag) { - return -1; - } - - if (!b.tag) { - return 1; - } - - if (a.tag.id < b.tag.id) { - return -1; - } - - if (a.tag.id > b.tag.id) { - return 1; - } - - return 0; -} - -nlist_sort(l: *nlist): void { - var i: int; - var j: int; - var k: int; - var tmp: *nfa; - - if (l.fill < 2) { - return; - } - - i = l.fill - 1; - loop { - j = i; - loop { - k = (j << 1) + 1; - if (k >= l.fill) { - break; - } - if (k + 1 < l.fill && l.live[k].id < l.live[k + 1].id) { - k = k + 1; - } - if (l.live[j].id >= l.live[k].id) { - break; - } - tmp = l.live[k]; - l.live[k] = l.live[j]; - l.live[j] = tmp; - j = k; - } - if (i == 0) { - break; - } - i = i - 1; - } - - i = l.fill - 1; - loop { - if (i == 0) { - break; - } - tmp = l.live[i]; - l.live[i] = l.live[0]; - j = 0; - loop { - k = (j << 1) + 1; - if (k >= i) { - break; - } - if (k + 1 < i && l.live[k].id < l.live[k + 1].id) { - k = k + 1; - } - if (tmp.id >= l.live[k].id) { - break; - } - j = k; - } - l.live[j] = tmp; - i = i - 1; - } -} - -alloc_link(c: *compiler): **dfa { - var link: **dfa; - var i: int; - link = alloc(&c.a, sizeof(*link) * 256): **dfa; - i = 0; - loop { - if (i == 256) { - break; - } - link[i] = 0:*dfa; - i = i + 1; - } - return link; -} - -nlist_copy(c: *compiler, dest: *nlist, src: *nlist): void { - var i: int; - alloc_nlist(c, dest, src.fill); - dest.fill = src.fill; - dest.tag = src.tag; - i = 0; - loop { - if (i >= src.fill) { - break; - } - dest.live[i] = src.live[i]; - i = i + 1; - } -} - -nlist2dfa(c: *compiler, l: *nlist): *dfa { - var link: **dfa; - var d: *dfa; - var n: *nfa; - var t: *tag; - var dir: int; - var i: int; - var j: int; - - if (l.fill == 0 && !l.tag) { - return 0:*dfa; - } - - link = &c.d; - loop { - d = *link; - if (!d) { - break; - } - - dir = nlist_cmp(l, &d.key); - - if (dir < 0) { - link = &d.l; - } else if (dir > 0) { - link = &d.r; - } else { - return d; - } - } - - d = alloc(&c.a, sizeof(*d)): *dfa; - d.id = c.ndfa; - d.link = alloc_link(c); - nlist_copy(c, &d.key, l); - d.l = 0: *dfa; - d.r = 0: *dfa; - d.seen = 0; - - c.ndfa = c.ndfa + 1; - - *link = d; - - i = 0; - loop { - if (i == 256) { - break; - } - - deactivate(l); - j = 0; - loop { - if (j >= d.key.fill) { - break; - } - - n = d.key.live[j]; - if (n.left <= i && i < n.right) { - if (n.a) { - activate(l, n.a); - } - if (n.b) { - activate(l, n.b); - } - } - - j = j + 1; - } - - d.link[i] = nlist2dfa(c, l); - - i = i + 1; - } - - return d; -} - -deactivate(l: *nlist): void { - var i: int; - i = 0; - loop { - if (i >= l.fill) { - l.fill = 0; - l.tag = 0: *tag; - break; - } - l.live[i].live = 0; - i = i + 1; - } -} - -powerset(c: *compiler, n: *nfa): *dfa { - var live: nlist; - alloc_nlist(c, &live, c.nnfa); - activate(&live, n); - return nlist2dfa(c, &live); -} - -codegen(c: *compiler, a: *dfa): void { - var i: int; - var b: *dfa; - var lo: int; - var hi: int; - - if (a.seen) { - return; - } - a.seen = 1; - - fdput(1, ":_"); - fdputd(1, a.id); - fdput(1, ";\n"); - - if (a.key.tag) { - fdput(1, "\tlexmark(l, T_"); - fdput(1, a.key.tag.s); - fdput(1, ");\n"); - } - - fdput(1, "\tch = lexfeedc(l);\n"); - - i = 0; - loop { - loop { - if (i == 256 || a.link[i]) { - lo = i; - break; - } - i = i + 1; - } - - if (lo == 256) { - break; - } - - b = a.link[i]; - - loop { - if (i == 256 || a.link[i] != b) { - hi = i; - break; - } - i = i + 1; - } - - if (lo != (hi - 1)) { - fdput(1, "\tif (ch >= "); - fdputd(1, lo); - fdput(1, " && ch <= "); - fdputd(1, hi - 1); - } else { - fdput(1, "\tif (ch == "); - fdputd(1, lo); - } - fdput(1, ") { "); - fdput(1, "goto _"); - fdputd(1, b.id); - fdput(1, "; }\n"); - - } - - fdput(1, "\treturn;\n"); - - i = 0; - loop { - if (i == 256) { - break; - } - - if (a.link[i]) { - codegen(c, a.link[i]); - } - - i = i + 1; - } -} - -gen(c: *compiler, a: *dfa): void { - var t: *tag; - t = c.tags; - fdput(1, "enum {\n"); - fdput(1, "\tT_invalid,\n"); - fdput(1, "\tT_eof,\n"); - loop { - if (!t) { - break; - } - fdput(1, "\tT_"); - fdput(1, t.s); - fdput(1, ",\n"); - t = t.next; - } - fdput(1, "}\n"); - fdput(1, "\n"); - fdput(1, "lexstep(l: *lex_state): void {\n"); - fdput(1, "\tvar ch: int;\n"); - fdput(1, "\tlexmark(l, T_invalid);\n"); - codegen(c, a); - fdput(1, "}\n"); -} - -main(): void { - var c: compiler; - var n: *nfa; - var a: *dfa; - setup(&c); - n = parse_program(&c); - a = powerset(&c, n); - gen(&c, a); -} diff --git a/lex1.c b/lex1.c @@ -0,0 +1,408 @@ +enum { + T_EOF, + T_IDENT, + T_NUM, + T_HEX, + T_STR, + T_CHAR, + T_LPAR, + T_RPAR, + T_LBRA, + T_RBRA, + T_COMMA, + T_SEMI, + T_COLON, + T_STAR, + T_DOT, + T_NOT, + T_ASSIGN, + T_AMP, + T_OR, + T_XOR, + T_LT, + T_GT, + T_LE, + T_GE, + T_EQ, + T_NE, + T_ADD, + T_SUB, + T_LSH, + T_RSH, + T_BANG, + T_BOR, + T_BAND, + T_LSQ, + T_RSQ, + T_DIV, + T_MOD, +} + +open_source(c: *compiler, filename: *byte) { + var fd: int; + + c.filename = filename; + c.nc = 0; + c.lineno = 1; + c.colno = 1; + c.tlen = 0; + c.tt = 0; + + fd = open(filename, 0, 0); + if (fd < 0) { + cdie(c, "failed to open file"); + } + + c.fdin = fd; + c.nc = getchar(c); + + feed(c); +} + +close_source(c: *compiler) { + if (c.fdin != 0) { + close(c.fdin); + } + c.fdin = 0; +} + +getchar(c: *compiler): int { + var b: byte; + var ret: int; + ret = read(c.fdin, &b, 1); + if (ret < 0) { + exit(3); + } + if (ret == 0) { + return -1; + } + return b: int; +} + +feedc(c: *compiler) { + c.nc = getchar(c); + if (c.nc == '\n') { + c.lineno = c.lineno + 1; + c.colno = 0; + } + c.colno = c.colno + 1; +} + +tappend(c: *compiler) { + c.token[c.tlen] = c.nc:byte; + c.tlen = c.tlen + 1; + if (c.tlen == c.tmax) { + cdie(c, "identifier too long"); + } + c.token[c.tlen] = 0:byte; + feedc(c); +} + +feed(c: *compiler) { + c.tlen = 0; + c.token[0] = 0:byte; + + loop { + if (c.nc == -1) { + // Reached the end of input + c.tt = T_EOF; + return; + } else if (c.nc == ' ' || c.nc == '\t' || c.nc == '\r') { + // Whitespace + feedc(c); + } else if (c.nc == '\n') { + // Line end + feedc(c); + } else if (c.nc == '/') { + // Comment + feedc(c); + if (c.nc == '/') { + // Read until the end of the comment + loop { + if (c.nc == '\n' || c.nc == -1) { + break; + } + feedc(c); + } + } else { + c.tt = T_DIV; + return; + } + + } else { + // Start of a real token + break; + } + } + + // Identifier + if ((c.nc >= 'a' && c.nc <= 'z') || (c.nc >= 'A' && c.nc <= 'Z') || c.nc == '_') { + feed_ident(c); + return; + } + + // String + if (c.nc == '"') { + feed_str(c); + return; + } + + // Character + if (c.nc == '\'') { + feed_char(c); + return; + } + + // Number + if (c.nc >= '0' && c.nc <= '9') { + feed_num(c); + return; + } + + // Single character tokens + if (c.nc == '(') { + c.tt = T_LPAR; + feedc(c); + } else if (c.nc == ')') { + c.tt = T_RPAR; + feedc(c); + } else if (c.nc == '{') { + c.tt = T_LBRA; + feedc(c); + } else if (c.nc == '}') { + c.tt = T_RBRA; + feedc(c); + } else if (c.nc == ',') { + c.tt = T_COMMA; + feedc(c); + } else if (c.nc == ';') { + c.tt = T_SEMI; + feedc(c); + } else if (c.nc == ':') { + c.tt = T_COLON; + feedc(c); + } else if (c.nc == '*') { + c.tt = T_STAR; + feedc(c); + } else if (c.nc == '.') { + c.tt = T_DOT; + feedc(c); + } else if (c.nc == '=') { + c.tt = T_ASSIGN; + feedc(c); + if (c.nc == '=') { + c.tt = T_EQ; + feedc(c); + } + } else if (c.nc == '&') { + c.tt = T_AMP; + feedc(c); + if (c.nc == '&') { + c.tt = T_BAND; + feedc(c); + } + } else if (c.nc == '~') { + c.tt = T_NOT; + feedc(c); + } else if (c.nc == '|') { + c.tt = T_OR; + feedc(c); + if (c.nc == '|') { + c.tt = T_BOR; + feedc(c); + } + } else if (c.nc == '^') { + c.tt = T_XOR; + feedc(c); + } else if (c.nc == '!') { + c.tt = T_BANG; + feedc(c); + if (c.nc == '=') { + c.tt = T_NE; + feedc(c); + } + } else if (c.nc == '<') { + c.tt = T_LT; + feedc(c); + if (c.nc == '<') { + c.tt = T_LSH; + feedc(c); + } else if (c.nc == '=') { + c.tt = T_LE; + feedc(c); + } + } else if (c.nc == '>') { + c.tt = T_GT; + feedc(c); + if (c.nc == '>') { + c.tt = T_RSH; + feedc(c); + } else if (c.nc == '=') { + c.tt = T_GE; + feedc(c); + } + } else if (c.nc == '[') { + c.tt = T_LSQ; + feedc(c); + } else if (c.nc == ']') { + c.tt = T_RSQ; + feedc(c); + } else if (c.nc == '+') { + c.tt = T_ADD; + feedc(c); + } else if (c.nc == '-') { + c.tt = T_SUB; + feedc(c); + } else if (c.nc == '%') { + c.tt = T_MOD; + feedc(c); + } else { + cdie(c, "invalid char"); + } +} + +feed_ident(c: *compiler) { + c.tt = T_IDENT; + loop { + if (!((c.nc >= 'a' && c.nc <= 'z') || + (c.nc >= 'A' && c.nc <= 'Z') || + (c.nc >= '0' && c.nc <= '9') || + c.nc == '_')) { + break; + } + tappend(c); + } +} + +hexdig(c: *compiler): int { + if (c.nc >= '0' && c.nc <= '9') { + return c.nc - '0'; + } + + if (c.nc >= 'A' && c.nc <= 'F') { + return (c.nc - 'F') + 10; + } + + if (c.nc >= 'a' && c.nc <= 'f') { + return (c.nc - 'a') + 10; + } + + cdie(c, "invalid hex digit"); + return 0; +} + +feed_escape(c: *compiler) { + var hex: int; + + // backslash + feedc(c); + + if (c.nc == 't') { + c.nc = '\t'; + } else if (c.nc == 'r') { + c.nc = '\r'; + } else if (c.nc == 'n') { + c.nc = '\n'; + } else if (c.nc == 'x') { + c.nc = getchar(c); + hex = hexdig(c) * 16; + + c.nc = getchar(c); + hex = hex + hexdig(c); + + c.nc = hex; + } else if (c.nc != '\\' && c.nc != '\'' && c.nc != '"') { + cdie(c, "invalid escape"); + } +} + +feed_str(c: *compiler) { + c.tt = T_STR; + + // quote + feedc(c); + + loop { + if (c.nc == '"') { + feedc(c); + return; + } + + if (c.nc == -1 || c.nc == 0 || c.nc == '\n') { + cdie(c, "invalid char in string"); + } + + if (c.tlen == c.tmax) { + cdie(c, "string too long"); + } + + if (c.nc == '\\') { + feed_escape(c); + } + + tappend(c); + } +} + +feed_char(c: *compiler) { + c.tt = T_CHAR; + + // quote + feedc(c); + + if (c.nc == 0 || c.nc == -1 || c.nc == '\'' || c.nc == '\n') { + cdie(c, "invalid char"); + } + + if (c.nc == '\\') { + feed_escape(c); + } + + tappend(c); + + if (c.nc != '\'') { + cdie(c, "expected '"); + } + + feedc(c); +} + +feed_hex(c: *compiler) { + c.tt = T_HEX; + + loop { + if (!((c.nc >= '0' && c.nc <= '9') + || (c.nc >= 'a' && c.nc <= 'f') + || (c.nc >= 'A' && c.nc <= 'F'))) { + break; + } + + tappend(c); + } + + if (c.tlen == 0) { + cdie(c, "expected hex"); + } +} + +feed_num(c: *compiler) { + c.tt = T_NUM; + + if (c.nc == '0') { + tappend(c); + + if (c.nc == 'x') { + feedc(c); + feed_hex(c); + return; + } + } + + loop { + if (!(c.nc >= '0' && c.nc <= '9')) { + break; + } + + tappend(c); + } +} diff --git a/lib.c b/lib.c @@ -0,0 +1,117 @@ +die(msg: *byte) { + fdputs(2, msg); + fdputs(2, "\n"); + exit(1); +} + +strlen(s: *byte): int { + var ret: int; + ret = 0; + loop { + if (s[ret] == 0:byte) { + return ret; + } + ret = ret + 1; + } +} + +strcmp(a: *byte, b: *byte): int { + var i: int; + + i = 0; + + loop { + if (a[i] > b[i]) { + return 1; + } + + if (a[i] < b[i]) { + return -1; + } + + if (a[i] == 0:byte) { + return 0; + } + + i = i + 1; + } +} + +fdputc(fd: int, ch: int) { + var b: byte; + var ret: int; + b = ch: byte; + ret = write(fd, &b, 1); + if (ret != 1) { + exit(3); + } +} + +fdputs(fd: int, msg: *byte) { + var len: int; + var ret: int; + var off: int; + len = strlen(msg); + off = 0; + loop { + if (off == len) { + break; + } + ret = write(fd, msg, len - off); + if (ret < 0) { + exit(3); + } + off = off + ret; + } +} + +fdputh(fd: int, n: int) { + var c: int; + var r: int; + + r = 0; + loop { + if (n == 0) { + break; + } + + r = (r << 4) + (n & 15); + n = n >> 4; + } + + n = r; + + loop { + c = n & 15; + n = n >> 4; + + if (c < 10) { + fdputc(fd, c + '0'); + } else { + fdputc(fd, c + ('a' - 10)); + } + + if (n == 0) { + break; + } + } +} + +fdputd(fd: int, n: int) { + var a: int; + + if (n < 0) { + fdputc(fd, '-'); + a = -(n % 10); + n = n / -10; + } else { + a = n % 10; + n = n / 10; + } + + if (n != 0) { + fdputd(fd, n); + } + + fdputc(fd, '0' + a); +} diff --git a/parse1.c b/parse1.c @@ -0,0 +1,1482 @@ +struct node { + kind: int; + a: *node; + b: *node; + filename: *byte; + lineno: int; + colno: int; + n: int; + s: *byte; + t: *type; +} + +enum { + N_IDENT, + N_NUM, + N_CHAR, + N_STR, + N_STMTLIST, + N_EXPRLIST, + N_CALL, + N_DOT, + N_ARGLIST, + N_FUNC, + N_ARGDECL, + N_FUNCDECL, + N_PROGRAM, + N_FUNCTYPE, + N_PTRTYPE, + N_STRUCT, + N_MEMBERDECL, + N_MEMBERLIST, + N_CONDLIST, + N_COND, + N_ENUM, + N_ENUMLIST, + N_LOOP, + N_BREAK, + N_CONTINUE, + N_RETURN, + N_VARDECL, + N_LABEL, + N_GOTO, + N_ASSIGN, + N_SIZEOF, + N_REF, + N_DEREF, + N_CAST, + N_INDEX, + N_LT, + N_GT, + N_LE, + N_GE, + N_EQ, + N_NE, + N_ADD, + N_SUB, + N_MUL, + N_LSH, + N_RSH, + N_BNOT, + N_BOR, + N_BAND, + N_AND, + N_OR, + N_XOR, + N_NOT, + N_POS, + N_NEG, + N_DIV, + N_MOD, +} + +mknode(c: *compiler, kind: int, a: *node, b: *node): *node { + var ret: *node; + ret = alloc(c.a, sizeof(*ret)):*node; + ret.kind = kind; + ret.a = a; + ret.b = b; + ret.filename = c.filename; + ret.lineno = c.lineno; + ret.colno = c.colno; + ret.n = 0; + ret.s = 0:*byte; + ret.t = 0:*type; + return ret; +} + +mknode0(c: *compiler, kind: int): *node { + return mknode(c, kind, 0:*node, 0:*node); +} + +mknode1(c: *compiler, kind: int, a: *node): *node { + return mknode(c, kind, a, 0:*node); +} + +// Copy the current token +intern(c: *compiler): *byte { + var ret: *byte; + var i: int; + + ret = alloc(c.a, c.tlen + 1); + + i = 0; + loop { + if (i == c.tlen) { + ret[i] = 0:byte; + return ret; + } + + ret[i] = c.token[i]; + + i = i + 1; + } +} + +// ident := IDENT +parse_ident(c: *compiler): *node { + var n: *node; + + if (c.tt != T_IDENT) { + return 0:*node; + } + + n = mknode0(c, N_IDENT); + n.s = intern(c); + feed(c); + + return n; +} + +parse_hex(c: *compiler): *node { + var n: *node; + var x: int; + var d: int; + var i: int; + + x = 0; + i = 0; + loop { + if (i == c.tlen) { + break; + } + + d = c.token[i]:int; + + if (d >= '0' && d <= '9') { + d = d - '0'; + } else if (d >= 'a' && d <= 'f') { + d = (d - 'a') + 10; + } else { + d = (d - 'A') + 10; + } + + x = x * 16; + x = x + d; + i = i + 1; + + if (x > 0x7fffffff) { + cdie(c, "overflow"); + } + } + + n = mknode0(c, N_NUM); + n.n = x; + feed(c); + + return n; +} + +// num := NUM +parse_num(c: *compiler): *node { + var n: *node; + var x: int; + var d: int; + var i: int; + + if (c.tt == T_HEX) { + return parse_hex(c); + } + + if (c.tt != T_NUM) { + return 0:*node; + } + + x = 0; + i = 0; + loop { + if (i == c.tlen) { + break; + } + + d = (c.token[i]:int) - '0'; + + x = x * 10; + + x = x + d; + i = i + 1; + + if (x > 0x7fffffff) { + cdie(c, "overflow"); + } + } + + n = mknode0(c, N_NUM); + n.n = x; + feed(c); + + return n; +} + +// chr := CHAR +parse_chr(c: *compiler): *node { + var n: *node; + + if (c.tt != T_CHAR) { + return 0:*node; + } + + n = mknode0(c, N_CHAR); + n.n = c.token[0]:int; + feed(c); + + return n; +} + +// str := STR +parse_str(c: *compiler): *node { + var n: *node; + + if (c.tt != T_STR) { + return 0:*node; + } + + n = mknode0(c, N_STR); + n.s = intern(c); + feed(c); + + return n; +} + +// primary := ident +// | num +// | str +// | chr +// | '(' expr ')' +parse_primary(c: *compiler): *node { + var n: *node; + + n = parse_ident(c); + if (n) { + return n; + } + + n = parse_num(c); + if (n) { + return n; + } + + n = parse_str(c); + if (n) { + return n; + } + + n = parse_chr(c); + if (n) { + return n; + } + + if (c.tt == T_LPAR) { + feed(c); + + n = parse_expr(c); + if (!n) { + cdie(c, "expected expr"); + } + + if (c.tt != T_RPAR) { + cdie(c, "expected )"); + } + feed(c); + + return n; + } + + return 0:*node; +} + +// expr_list := expr +// | expr ',' expr_list +parse_expr_list(c: *compiler): *node { + var n: *node; + var e: *node; + var a: *node; + + a = parse_expr(c); + if (!a) { + return 0:*node; + } + + e = mknode1(c, N_EXPRLIST, a); + n = e; + + loop { + if (c.tt != T_COMMA) { + return n; + } + feed(c); + + a = parse_expr(c); + if (!a) { + cdie(c, "expected expr"); + } + + e.b = mknode1(c, N_EXPRLIST, a); + e = e.b; + } +} + +// post_expr := primary +// | 'sizeof' '(' expr ')' +// | post_expr '[' expr ']' +// | post_expr '(' expr_list ')' +// | post_expr '.' ident +// | post_expr ':' type +parse_post_expr(c: *compiler): *node { + var n: *node; + var b: *node; + + if (c.tt == T_IDENT && !strcmp(c.token, "sizeof")) { + feed(c); + + if (c.tt != T_LPAR) { + cdie(c, "expected ("); + } + feed(c); + + n = parse_expr(c); + if (!n) { + cdie(c, "expected expr"); + } + + if (c.tt != T_RPAR) { + cdie(c, "expected )"); + } + feed(c); + + return mknode1(c, N_SIZEOF, n); + } + + n = parse_primary(c); + if (!n) { + return 0:*node; + } + + loop { + if (c.tt == T_LSQ) { + feed(c); + + b = parse_expr(c); + + if (c.tt != T_RSQ) { + cdie(c, "expected ]"); + } + feed(c); + + n = mknode(c, N_INDEX, n, b); + } else if (c.tt == T_LPAR) { + feed(c); + + b = parse_expr_list(c); + + if (c.tt != T_RPAR) { + cdie(c, "expected )"); + } + feed(c); + + n = mknode(c, N_CALL, n, b); + } else if (c.tt == T_DOT) { + feed(c); + + b = parse_ident(c); + if (!b) { + cdie(c, "expected ident"); + } + + n = mknode(c, N_DOT, n, b); + } else if (c.tt == T_COLON) { + feed(c); + + b = parse_type(c); + if (!b) { + cdie(c, "expected type"); + } + + n = mknode(c, N_CAST, n, b); + } else { + return n; + } + } +} + +// unary_expr := post_expr +// | '&' unary_expr +// | '*' unary_expr +// | '+' unary_expr +// | '-' unary_expr +// | '~' unary_expr +// | '!' unary_expr +parse_unary_expr(c: *compiler): *node { + var n: *node; + + if (c.tt == T_AMP) { + feed(c); + + n = parse_unary_expr(c); + if (!n) { + cdie(c, "expected unary_expr"); + } + + return mknode1(c, N_REF, n); + } + + if (c.tt == T_STAR) { + feed(c); + + n = parse_unary_expr(c); + if (!n) { + cdie(c, "expected unary_expr"); + } + + return mknode1(c, N_DEREF, n); + } + + if (c.tt == T_ADD) { + feed(c); + + n = parse_unary_expr(c); + if (!n) { + cdie(c, "expected unary_expr"); + } + + return mknode1(c, N_POS, n); + } + + if (c.tt == T_SUB) { + feed(c); + + n = parse_unary_expr(c); + if (!n) { + cdie(c, "expected unary_expr"); + } + + return mknode1(c, N_NEG, n); + } + + if (c.tt == T_NOT) { + feed(c); + + n = parse_unary_expr(c); + if (!n) { + cdie(c, "expected unary_expr"); + } + + return mknode1(c, N_NOT, n); + } + + if (c.tt == T_BANG) { + feed(c); + + n = parse_unary_expr(c); + if (!n) { + cdie(c, "expected unary_expr"); + } + + return mknode1(c, N_BNOT, n); + } + + return parse_post_expr(c); +} + + +// shift_expr := unary_expr +// | shift_expr '<<' unary_expr +// | shift_expr '>>' unary_expr +parse_shift_expr(c: *compiler): *node { + var a: *node; + var b: *node; + + a = parse_unary_expr(c); + if (!a) { + return 0:*node; + } + + loop { + if (c.tt == T_LSH) { + feed(c); + + b = parse_unary_expr(c); + if (!b) { + cdie(c, "expected unary_expr"); + } + + a = mknode(c, N_LSH, a, b); + } else if (c.tt == T_RSH) { + feed(c); + + b = parse_unary_expr(c); + if (!b) { + cdie(c, "expected unary_expr"); + } + + a = mknode(c, N_RSH, a, b); + } else { + return a; + } + } +} + +// mul_expr := shift_expr +// | mul_expr '*' shift_expr +// | mul_expr '/' shift_expr +// | mul_expr '%' shift_expr +// | mul_expr '&' shift_expr +parse_mul_expr(c: *compiler): *node { + var a: *node; + var b: *node; + + a = parse_shift_expr(c); + if (!a) { + return 0:*node; + } + + loop { + if (c.tt == T_STAR) { + feed(c); + + b = parse_shift_expr(c); + if (!b) { + cdie(c, "expected shift_expr"); + } + + a = mknode(c, N_MUL, a, b); + } else if (c.tt == T_DIV) { + feed(c); + + b = parse_shift_expr(c); + if (!b) { + cdie(c, "expected shift_expr"); + } + + a = mknode(c, N_DIV, a, b); + } else if (c.tt == T_MOD) { + feed(c); + + b = parse_shift_expr(c); + if (!b) { + cdie(c, "expected shift_expr"); + } + + a = mknode(c, N_MOD, a, b); + } else if (c.tt == T_AMP) { + feed(c); + + b = parse_shift_expr(c); + if (!b) { + cdie(c, "expected shift_expr"); + } + + a = mknode(c, N_AND, a, b); + } else { + return a; + } + } +} + +// add_expr := mul_expr +// | add_expr '+' mul_expr +// | add_expr '-' mul_expr +// | add_expr '|' mul_expr +// | add_expr '^' mul_expr +parse_add_expr(c: *compiler): *node { + var a: *node; + var b: *node; + + a = parse_mul_expr(c); + if (!a) { + return 0:*node; + } + + loop { + if (c.tt == T_ADD) { + feed(c); + + b = parse_mul_expr(c); + if (!b) { + cdie(c, "expected mul_expr"); + } + + a = mknode(c, N_ADD, a, b); + } else if (c.tt == T_SUB) { + feed(c); + + b = parse_mul_expr(c); + if (!b) { + cdie(c, "expected mul_expr"); + } + + a = mknode(c, N_SUB, a, b); + } else if (c.tt == T_OR) { + feed(c); + + b = parse_mul_expr(c); + if (!b) { + cdie(c, "expected mul_expr"); + } + + a = mknode(c, N_OR, a, b); + } else if (c.tt == T_XOR) { + feed(c); + + b = parse_mul_expr(c); + if (!b) { + cdie(c, "expected mul_expr"); + } + + a = mknode(c, N_XOR, a, b); + } else { + return a; + } + } +} + +// comp_expr := add_expr +// | add_expr '<' add_expr +// | add_expr '>' add_expr +// | add_expr '<=' add_expr +// | add_expr '>=' add_expr +// | add_expr '==' add_expr +// | add_expr '!=' add_expr +parse_comp_expr(c: *compiler): *node { + var a: *node; + var b: *node; + + a = parse_add_expr(c); + if (!a) { + return 0:*node; + } + + if (c.tt == T_LT) { + feed(c); + + b = parse_add_expr(c); + if (!b) { + cdie(c, "expected add_expr"); + } + + return mknode(c, N_LT, a, b); + } + + if (c.tt == T_GT) { + feed(c); + + b = parse_add_expr(c); + if (!b) { + cdie(c, "expected add_expr"); + } + + return mknode(c, N_GT, a, b); + } + + if (c.tt == T_LE) { + feed(c); + + b = parse_add_expr(c); + if (!b) { + cdie(c, "expected add_expr"); + } + + return mknode(c, N_LE, a, b); + } + + if (c.tt == T_GE) { + feed(c); + + b = parse_add_expr(c); + if (!b) { + cdie(c, "expected add_expr"); + } + + return mknode(c, N_GE, a, b); + } + + if (c.tt == T_EQ) { + feed(c); + + b = parse_add_expr(c); + if (!b) { + cdie(c, "expected add_expr"); + } + + return mknode(c, N_EQ, a, b); + } + + if (c.tt == T_NE) { + feed(c); + + b = parse_add_expr(c); + if (!b) { + cdie(c, "expected add_expr"); + } + + return mknode(c, N_NE, a, b); + } + + return a; +} + +// bool_expr := bool_expr +// | bool_expr '&&' comp_expr +// | bool_expr '||' comp_expr +parse_bool_expr(c: *compiler): *node { + var a: *node; + var b: *node; + + a = parse_comp_expr(c); + if (!a) { + return 0:*node; + } + + if (c.tt == T_BAND) { + feed(c); + + b = parse_bool_expr(c); + if (!b) { + cdie(c, "expected bool_expr"); + } + + return mknode(c, N_BAND, a, b); + } + + if (c.tt == T_BOR) { + feed(c); + + b = parse_bool_expr(c); + if (!b) { + cdie(c, "expected bool_expr"); + } + + return mknode(c, N_BOR, a, b); + } + + return a; +} + +// expr := bool_expr +// | bool_expr '=' expr +parse_expr(c: *compiler): *node { + var a: *node; + var b: *node; + + a = parse_bool_expr(c); + if (!a) { + return 0:*node; + } + + if (c.tt == T_ASSIGN) { + feed(c); + + b = parse_expr(c); + if (!b) { + cdie(c, "expected expr"); + } + + return mknode(c, N_ASSIGN, a, b); + } + + return a; +} + +// if_stmt := 'if' expr '{' stmt_list '}' +// | 'if' expr '{' stmt_list '}' 'else' if_stmt +// | 'if' expr '{' stmt_list '}' 'else' '{' stmt_list '}' +parse_if_stmt(c: *compiler): *node { + var n: *node; + var e: *node; + var a: *node; + var b: *node; + + if (c.tt != T_IDENT || strcmp(c.token, "if")) { + return 0:*node; + } + feed(c); + + n = mknode0(c, N_CONDLIST); + e = n; + + loop { + a = parse_expr(c); + if !a { + cdie(c, "expected expr"); + } + + if (c.tt != T_LBRA) { + cdie(c, "expected {"); + } + feed(c); + + b = parse_stmt_list(c); + + if (c.tt != T_RBRA) { + cdie(c, "expected }"); + } + feed(c); + + e.a = mknode(c, N_COND, a, b); + + if (c.tt != T_IDENT || strcmp(c.token, "else")) { + return n; + } + feed(c); + + if (c.tt == T_LBRA) { + feed(c); + + b = parse_stmt_list(c); + + if (c.tt != T_RBRA) { + cdie(c, "expected }"); + } + feed(c); + + e.b = mknode1(c, N_CONDLIST, mknode(c, N_COND, 0:*node, b)); + + return n; + } + + if (c.tt != T_IDENT || strcmp(c.token, "if")) { + cdie(c, "expected if"); + } + feed(c); + + e.b = mknode0(c, N_CONDLIST); + e = e.b; + } +} + +// loop_stmt := 'loop' '{' stmt_list '}' +parse_loop_stmt(c: *compiler): *node { + var a: *node; + + if (c.tt != T_IDENT || strcmp(c.token, "loop")) { + return 0:*node; + } + feed(c); + + if (c.tt != T_LBRA) { + cdie(c, "expected {"); + } + feed(c); + + a = parse_stmt_list(c); + + if (c.tt != T_RBRA) { + cdie(c, "expected }"); + } + feed(c); + + return mknode1(c, N_LOOP, a); +} + +// break_stmt := 'break' +parse_break_stmt(c: *compiler): *node { + if (c.tt != T_IDENT || strcmp(c.token, "break")) { + return 0:*node; + } + feed(c); + + return mknode0(c, N_BREAK); +} + +// continue_stmt := 'continue' +parse_continue_stmt(c: *compiler): *node { + if (c.tt != T_IDENT || strcmp(c.token, "continue")) { + return 0:*node; + } + feed(c); + + return mknode0(c, N_CONTINUE); +} + +// return_stmt := 'return' +// | 'return' expr +parse_return_stmt(c: *compiler): *node { + var a: *node; + + if (c.tt != T_IDENT || strcmp(c.token, "return")) { + return 0:*node; + } + feed(c); + + a = parse_expr(c); + + return mknode1(c, N_RETURN, a); +} + +// var_decl := ident ':' type +parse_var_decl(c: *compiler): *node { + var n: *node; + var a: *node; + var b: *node; + + a = parse_ident(c); + if (!a) { + return 0:*node; + } + + if (c.tt != T_COLON) { + cdie(c, "expected :"); + } + feed(c); + + b = parse_type(c); + if (!b) { + cdie(c, "expected type"); + } + + return mknode(c, N_VARDECL, a, b); +} + +// var_stmt := 'var' var_decl +parse_var_stmt(c: *compiler): *node { + var a: *node; + + if (c.tt != T_IDENT || strcmp(c.token, "var")) { + return 0:*node; + } + feed(c); + + a = parse_var_decl(c); + if (!a) { + cdie(c, "expected var_decl"); + } + + return a; +} + +// label_stmt := ':' ident +parse_label_stmt(c: *compiler): *node { + var a: *node; + + if (c.tt != T_COLON) { + return 0:*node; + } + feed(c); + + a = parse_ident(c); + if (!a) { + cdie(c, "expected ident"); + } + + return mknode1(c, N_LABEL, a); +} + +// goto_stmt := 'goto' ident +parse_goto_stmt(c: *compiler): *node { + var a: *node; + + if (c.tt != T_IDENT || strcmp(c.token, "goto")) { + return 0:*node; + } + feed(c); + + a = parse_ident(c); + if (!a) { + cdie(c, "expected ident"); + } + + return mknode1(c, N_GOTO, a); +} + +// stmt := if_stmt +// | loop_stmt +// | break_stmt ';' +// | continue_stmt ';' +// | return_stmt ';' +// | var_stmt ';' +// | label_stmt ';' +// | goto_stmt ';' +// | expr ';' +parse_stmt(c: *compiler): *node { + var n: *node; + + n = parse_if_stmt(c); + if (n) { + return n; + } + + n = parse_loop_stmt(c); + if (n) { + return n; + } + + n = parse_break_stmt(c); + if (n) { + if (c.tt != T_SEMI) { + cdie(c, "expected ;"); + } + feed(c); + + return n; + } + + n = parse_return_stmt(c); + if (n) { + if (c.tt != T_SEMI) { + cdie(c, "expected ;"); + } + feed(c); + + return n; + } + + n = parse_continue_stmt(c); + if (n) { + if (c.tt != T_SEMI) { + cdie(c, "expected ;"); + } + feed(c); + + return n; + } + + n = parse_var_stmt(c); + if (n) { + if (c.tt != T_SEMI) { + cdie(c, "expected ;"); + } + feed(c); + + return n; + } + + n = parse_label_stmt(c); + if (n) { + if (c.tt != T_SEMI) { + cdie(c, "expected ;"); + } + feed(c); + + return n; + } + + n = parse_goto_stmt(c); + if (n) { + if (c.tt != T_SEMI) { + cdie(c, "expected ;"); + } + feed(c); + + return n; + } + + n = parse_expr(c); + if (n) { + if (c.tt != T_SEMI) { + cdie(c, "expected ;"); + } + feed(c); + + return n; + } + + return 0:*node; +} + +// stmt_list := stmt +// | stmt stmt_list +parse_stmt_list(c: *compiler): *node { + var n: *node; + var e: *node; + var a: *node; + + a = parse_stmt(c); + if (!a) { + return 0:*node; + } + + e = mknode1(c, N_STMTLIST, a); + n = e; + + loop { + a = parse_stmt(c); + if (!a) { + return n; + } + + e.b = mknode1(c, N_STMTLIST, a); + e = e.b; + } +} + +// enum_list := ident +// | enum_list ',' enum_list +parse_enum_list(c: *compiler): *node { + var n: *node; + var e: *node; + var a: *node; + + a = parse_ident(c); + if (!a) { + return 0:*node; + } + + e = mknode1(c, N_ENUMLIST, a); + n = e; + + loop { + if (c.tt != T_COMMA) { + return n; + } + feed(c); + + a = parse_ident(c); + if (!a) { + return n; + } + + e.b = mknode1(c, N_ENUMLIST, a); + e = e.b; + } +} + +// enum_decl := 'enum' ident '{' enum_list '}' +parse_enum_decl(c: *compiler): *node { + var b: *node; + + if (c.tt != T_IDENT) { + return 0:*node; + } + + if (strcmp(c.token, "enum")) { + return 0:*node; + } + feed(c); + + if (c.tt != T_LBRA) { + cdie(c, "expected {"); + } + feed(c); + + b = parse_enum_list(c); + + if (c.tt != T_RBRA) { + cdie(c, "expected }"); + } + feed(c); + + return mknode(c, N_ENUM, 0:*node, b); +} + +// type := ident +// | '*' type +// | '(' type ')' +// | 'func' func_type +parse_type(c: *compiler): *node { + var n: *node; + + if (c.tt == T_STAR) { + feed(c); + + n = parse_type(c); + + return mknode1(c, N_PTRTYPE, n); + } + + if (c.tt == T_LPAR) { + feed(c); + + n = parse_type(c); + + if (c.tt != T_RPAR) { + cdie(c, "expected )"); + } + feed(c); + + return n; + } + + if (c.tt == T_IDENT && !strcmp(c.token, "func")) { + feed(c); + + n = parse_func_type(c); + if (!n) { + cdie(c, "expected func_type"); + } + + return n; + } + + return parse_ident(c); +} + +// member_decl := ident ':' type +parse_member_decl(c: *compiler): *node { + var n: *node; + var a: *node; + var b: *node; + + a = parse_ident(c); + if (!a) { + return 0: *node; + } + + if (c.tt != T_COLON) { + cdie(c, "expected :"); + } + feed(c); + + b = parse_type(c); + if (!b) { + cdie(c, "expected type"); + } + + return mknode(c, N_MEMBERDECL, a, b); +} + +// member_list := member_decl +// | member_decl ';' member_list +parse_member_list(c: *compiler): *node { + var n: *node; + var e: *node; + var a: *node; + + a = parse_member_decl(c); + if (!a) { + return 0:*node; + } + + e = mknode1(c, N_MEMBERLIST, a); + n = e; + + loop { + if (c.tt != T_SEMI) { + return n; + } + feed(c); + + a = parse_member_decl(c); + if (!a) { + return n; + } + + e.b = mknode1(c, N_MEMBERLIST, a); + e = e.b; + } +} + +// struct_decl := 'struct' ident '{' member_list '}' +parse_struct_decl(c: *compiler): *node { + var a: *node; + var b: *node; + + if (c.tt != T_IDENT) { + return 0:*node; + } + + if (strcmp(c.token, "struct")) { + return 0:*node; + } + feed(c); + + a = parse_ident(c); + if (!a) { + cdie(c, "expected ident"); + } + + if (c.tt != T_LBRA) { + cdie(c, "expected {"); + } + feed(c); + + b = parse_member_list(c); + + if (c.tt != T_RBRA) { + cdie(c, "expected }"); + } + feed(c); + + return mknode(c, N_STRUCT, a, b); +} + +// arg_decl := ':' type +// ident ':' type +parse_arg_decl(c: *compiler): *node { + var n: *node; + var a: *node; + var b: *node; + + a = parse_ident(c); + if (!a) { + return 0:*node; + } + + if (c.tt != T_COLON) { + cdie(c, "expected :"); + } + feed(c); + + b = parse_type(c); + if (!b) { + cdie(c, "expected type"); + } + + return mknode(c, N_ARGDECL, a, b); +} + +// arg_list := arg_decl +// | arg_decl ',' arg_list +parse_arg_list(c: *compiler): *node { + var n: *node; + var e: *node; + var a: *node; + + a = parse_arg_decl(c); + if (!a) { + return 0:*node; + } + + e = mknode1(c, N_ARGLIST, a); + n = e; + + loop { + if (c.tt != T_COMMA) { + return n; + } + feed(c); + + a = parse_arg_decl(c); + if (!a) { + cdie(c, "expected identifier"); + } + + e.b = mknode1(c, N_ARGLIST, a); + e = e.b; + } +} + +// func_type := '(' ')' ':' type +// | '(' arg_list ')' ':' type +// | '(' ')' +// | '(' arg_list ')' +parse_func_type(c: *compiler): *node { + var a: *node; + var b: *node; + + if (c.tt != T_LPAR) { + return 0: *node; + } + feed(c); + + a = parse_arg_list(c); + + if (c.tt != T_RPAR) { + cdie(c, "expected )"); + } + feed(c); + + if (c.tt != T_COLON) { + return mknode1(c, N_FUNCTYPE, a); + } + feed(c); + + b = parse_type(c); + if (!b) { + cdie(c, "expected type"); + } + + return mknode(c, N_FUNCTYPE, a, b); +} + +// func_decl := ident func_type +parse_func_decl(c: *compiler): *node { + var a: *node; + var b: *node; + + a = parse_ident(c); + if (!a) { + return 0:*node; + } + + b = parse_func_type(c); + if (!b) { + cdie(c, "expected func_type"); + } + + return mknode(c, N_FUNCDECL, a, b); +} + +// func := func_decl '{' stmt_list '}' +// | func_decl ';' +parse_func(c: *compiler): *node { + var n: *node; + var a: *node; + var b: *node; + + a = parse_func_decl(c); + if (!a) { + return 0:*node; + } + + if (c.tt == T_SEMI) { + feed(c); + return a; + } + + if (c.tt != T_LBRA) { + cdie(c, "expected {"); + } + feed(c); + + b = parse_stmt_list(c); + + if (c.tt != T_RBRA) { + cdie(c, "expected }"); + } + feed(c); + + return mknode(c, N_FUNC, a, b); +} + +// decl := enum_decl +// | struct_decl +// | func +parse_decl(c: *compiler): *node { + var n: *node; + + n = parse_enum_decl(c); + if (n) { + return n; + } + + n = parse_struct_decl(c); + if (n) { + return n; + } + + return parse_func(c); +} + +// program := decl +// | decl program +parse_program(c: *compiler, p: *node): *node { + var n: *node; + var e: *node; + var d: *node; + + d = parse_decl(c); + if (!d) { + if (c.tt != T_EOF) { + cdie(c, "expected eof"); + } + return p; + } + + e = mknode1(c, N_PROGRAM, d); + n = e; + + loop { + d = parse_decl(c); + if (!d) { + if (c.tt != T_EOF) { + cdie(c, "expected eof"); + } + + e.b = p; + return n; + } + + e.b = mknode1(c, N_PROGRAM, d); + e = e.b; + } +} diff --git a/syscall.c b/syscall.c @@ -0,0 +1,34 @@ +syscall(n: int, a1: int, a2: int, a3: int, a4: int, a5: int, a6: int): int; + +_start(argc: int, argv: **byte, envp: **byte) { + main(argc, argv, envp); + exit(0); +} + +read(fd: int, buf: *byte, n: int): int { + return syscall(0, fd, buf: int, n, 0, 0, 0); +} + +write(fd: int, buf: *byte, n: int): int { + return syscall(1, fd, buf: int, n, 0, 0, 0); +} + +open(name: *byte, flags: int, mode: int): int { + return syscall(2, name: int, flags, mode, 0, 0, 0); +} + +close(fd: int): int { + return syscall(3, fd, 0, 0, 0, 0, 0); +} + +mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int { + return syscall(9, addr, len, prot, flags, fd, off); +} + +exit(n: int) { + syscall(60, n, 0, 0, 0, 0, 0); +} + +unlink(name: *byte): int { + return syscall(87, name: int, 0, 0, 0, 0, 0); +} diff --git a/type.c b/type.c @@ -0,0 +1,186 @@ +struct type { + kind: int; + st: *decl; + val: *type; + arg: *type; +} + +enum { + TY_VOID, + TY_INT, + TY_BYTE, + TY_PTR, + TY_ARG, + TY_FUNC, + TY_STRUCT, +} + +type_sizeof(c: *compiler, t: *type): int { + var kind: int; + + kind = t.kind; + if (kind == TY_INT) { + return 8; + } else if (kind == TY_BYTE) { + return 8; + } else if (kind == TY_PTR) { + return 8; + } else if (kind == TY_FUNC) { + return 8; + } else if (kind == TY_STRUCT) { + layout_struct(c, t.st); + return t.st.struct_size; + } else { + cdie(c, "sizeof: invalid type"); + } +} + +// Unify two types +unify(c: *compiler, a: *type, b: *type) { + var kind: int; + + if (a == b) { + return; + } + + if ((a && !b) || (b && !a) || a.kind != b.kind) { + cdie(c, "type error"); + } + + kind = a.kind; + if (kind == TY_PTR) { + unify(c, a.val, b.val); + } else if (kind == TY_FUNC) { + unify(c, a.val, b.val); + unify(c, a.arg, b.arg); + } else if (kind == TY_ARG) { + unify(c, a.val, b.val); + unify(c, a.arg, b.arg); + } else if (kind == TY_STRUCT) { + if (a.st != b.st) { + cdie(c, "type error"); + } + } else if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE) { + cdie(c, "unify: invalid type"); + } +} + +count_args(c: *compiler, t: *type): int { + var nargs: int; + + nargs = 0; + loop { + if (!t) { + break; + } + + t = t.arg; + nargs = nargs + 1; + } + + return nargs; +} + +mktype(c: *compiler, kind: int, a: *type, b: *type, st: *decl): *type { + var t: *type; + + t = alloc(c.a, sizeof(*t)):*type; + + t.kind = kind; + t.st = st; + t.val = a; + t.arg = b; + + return t; +} + +mktype_struct(c: *compiler, st: *decl): *type { + return mktype(c, TY_STRUCT, 0:*type, 0:*type, st); +} + +mktype0(c: *compiler, kind: int): *type { + return mktype(c, kind, 0:*type, 0:*type, 0:*decl); +} + +mktype1(c: *compiler, kind: int, a: *type): *type { + return mktype(c, kind, a, 0:*type, 0:*decl); +} + +mktype2(c: *compiler, kind: int, a: *type, b: *type): *type { + return mktype(c, kind, a, b, 0:*decl); +} + +prototype(c: *compiler, n: *node): *type { + var a: *type; + var b: *type; + var st: *decl; + var kind: int; + + if (!n) { + return 0:*type; + } + + c.lineno = n.lineno; + c.colno = 0; + + kind = n.kind; + if (kind == N_IDENT) { + if (!strcmp(n.s, "void")) { + return mktype0(c, TY_VOID); + } + + if (!strcmp(n.s, "int")) { + return mktype0(c, TY_INT); + } + + if (!strcmp(n.s, "byte")) { + return mktype0(c, TY_BYTE); + } + + st = find(c, n.s, 0:*byte, 0); + if (!st || !st.struct_defined) { + cdie(c, "unknown struct"); + } + + return mktype_struct(c, st); + } else if (kind == N_ARGLIST) { + a = prototype(c, n.a.b); + b = prototype(c, n.b); + + kind = a.kind; + if (kind != TY_INT && kind != TY_BYTE + && kind != TY_PTR && kind != TY_FUNC) { + cdie(c, "not a ptr arg"); + } + + return mktype2(c, TY_ARG, a, b); + } else if (kind == N_FUNCTYPE) { + if (n.b) { + a = prototype(c, n.b); + } else{ + a = mktype0(c, TY_VOID); + } + + b = prototype(c, n.a); + + kind = a.kind; + if (kind != TY_VOID && kind != TY_INT && kind != TY_BYTE + && kind != TY_PTR && kind != TY_FUNC) { + cdie(c, "not a ptr return"); + } + + return mktype2(c, TY_FUNC, a, b); + } else if (kind == N_PTRTYPE) { + return mktype1(c, TY_PTR, prototype(c, n.a)); + } else { + cdie(c, "prototype: invalid type"); + } +} + +type_isint(t: *type): int { + return t.kind == TY_INT || t.kind == TY_BYTE; +} + +type_isprim(t: *type): int { + return t.kind != TY_VOID && t.kind != TY_STRUCT; +}