os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 51b076d69589388ca26d29debfc47f1193a481dd
parent 3cdca44786443b4142a7a98be8476b890a29a03c
Author: erai <erai@omiltem.net>
Date:   Sun,  9 Feb 2025 00:28:30 +0000

directly genereate ir in peg

Diffstat:
Mbootstrap.sh | 7++-----
Mbuild.sh | 5++---
Mcc1.om | 275++++++++++++++++++++++++++-----------------------------------------------------
Mcc3.om | 3+++
Mdecl.om | 13-------------
Mir.om | 139+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mircout.om | 104++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mparse2.om | 10+++++-----
Mpeg.om | 699++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
9 files changed, 792 insertions(+), 463 deletions(-)

diff --git a/bootstrap.sh b/bootstrap.sh @@ -1,15 +1,12 @@ #!/bin/sh -set -x - BOOTSTRAP="cc0.c" LIBS="bufio.om lib.om alloc.om syscall.om" SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om" # Build the bootstrap compiler from c -[ cc0 -nt cc0.c ] || gcc -O1 -g -std=c99 ${BOOTSTRAP} -o cc0 -./cc0 cc3.om -P parse3.om -./cc0 ${LIBS} ${SOURCES} parse3.om -o cc1 -n cc1.lines -G cc1.call +[ cc0 -nt cc0.c ] || gcc -O1 -g -std=c89 ${BOOTSTRAP} -o cc0 +./cc0 ${LIBS} ${SOURCES} cc3.om -o cc1 -n cc1.lines -G cc1.call # Double check the bootstrap and self hosting compiler have the same output ./cc1 ${LIBS} ${SOURCES} cc3.om -C cc2.c -o cc2 -n cc2.lines -G cc2.call diff --git a/build.sh b/build.sh @@ -3,12 +3,11 @@ LIBS="peglib.om bufio.om lib.om alloc.om syscall.om" CRYPTO="ed25519.om sha512.om sha256.om chacha20.om poly1305.om" CC="cc1.om type.om as.om decl.om node.om" -PEG="peg.om parsepeg.om" SSHD="chacha20.om poly1305.om sha256.om sha512.om ed25519.om sshd.om" KERNEL="kernel.om" SHELL="echo.om cmp.om rm.om ls.om cat.om xxd.om mv.om mkdir.om cpio.om sh.om" -BIN="echo cmp rm ls cat xxd mv mkdir cpio sh sshd init cc1 build.sh peg.peg cc3.peg" -ALL="${LIBS} ${CC} ${PEG} ${SSHD} ${KERNEL} ${SHELL} ${BIN}" +BIN="echo cmp rm ls cat xxd mv mkdir cpio sh sshd init cc1 build.sh cc3.om" +ALL="${LIBS} ${CC} ${SSHD} ${KERNEL} ${SHELL} ${BIN}" ./cc1 ${LIBS} echo.om -o echo -n echo.lines -G echo.call ./cc1 ${LIBS} cmp.om -o cmp -n cmp.lines -G cmp.call diff --git a/cc1.om b/cc1.om @@ -62,7 +62,7 @@ func comp_setup(a: *alloc, err: *file): *compiler { c.err = err; - c.p = setup_parser(a, err); + c.p = setup_parser(c, err); c.filename = nil; c.lineno = 0; @@ -156,18 +156,23 @@ func compile(c: *compiler, p: *node) { d = next_decl(c, d); } - // Typecheck functions - d = first_decl(c); + // Typecheck functions and translate to ir + n = p; loop { - if (!d) { + if (!n) { break; } - if d.func_defined { - typecheck_func(c, d); + kind = n.a.kind; + if (kind == N_FUNC) { + d = find(c, n.a.a.a.s, nil, 0); + if n.a.b { + typecheck_stmt(c, d, n.a.b); + } + d.func_ir = func_to_ir(c, n.a); } - d = next_decl(c, d); + n = n.b; } // Check usage @@ -203,20 +208,6 @@ func compile(c: *compiler, p: *node) { check_usage(c); - // Translate each function to ir - d = first_decl(c); - loop { - if (!d) { - break; - } - - if d.func_used && d.func_defined && d.func_def { - d.func_ir = func_to_ir(c, d.func_def); - } - - d = next_decl(c, d); - } - // Do optimization passes d = first_decl(c); loop { @@ -224,7 +215,7 @@ func compile(c: *compiler, p: *node) { break; } - if d.func_used && d.func_defined && d.func_def { + if d.func_used && d.func_defined && d.func_ir { ir_optimize(d.func_ir); } @@ -238,7 +229,7 @@ func compile(c: *compiler, p: *node) { break; } - if d.func_used && d.func_defined && d.func_def { + if d.func_used && d.func_defined && d.func_ir { output_ir(c, d); } @@ -273,118 +264,56 @@ func mark_func_used(c: *compiler, d: *decl, v: *decl) { c.used_top = v; } -func mark_expr_used(c: *compiler, d: *decl, n: *node) { +func mark_ir_expr(c: *compiler, d: *decl, op: *irop) { var v: *decl; var kind: int; - if !n { - return; - } - - c.filename = n.filename; - c.lineno = n.lineno; - c.colno = n.colno; - - kind = n.kind; - if kind == N_EXPRLIST { - loop { - if !n { - break; - } - - mark_expr_used(c, d, n.a); - - n = n.b; - } - return; - } else if kind == N_IDENT { - v = find(c, n.s, nil, 0); - if v && v.enum_defined { + if op.kind == IOP_FUNC { + v = find(c, op.s, nil, 0); + if !v || !v.func_defined { return; } - v = find(c, d.name, n.s, 0); - if v && v.var_defined { - return; - } - - v = find(c, n.s, nil, 0); - if v && v.func_defined { - mark_func_used(c, d, v); - return; - } - - cdie(c, "no such variable"); - } else if kind == N_CALL || kind == N_ASSIGN || kind == N_INDEX - || kind == N_LT || kind == N_GT || kind == N_LE - || kind == N_GE || kind == N_EQ || kind == N_NE - || kind == N_BOR || kind == N_BAND || kind == N_ADD - || kind == N_SUB || kind == N_MUL || kind == N_DIV - || kind == N_MOD || kind == N_LSH || kind == N_RSH - || kind == N_AND || kind == N_OR || kind == N_XOR { - mark_expr_used(c, d, n.a); - mark_expr_used(c, d, n.b); - return; - } else if kind == N_REF || kind == N_DEREF || kind == N_BNOT - || kind == N_POS || kind == N_NEG || kind == N_NOT - || kind == N_CAST || kind == N_DOT { - mark_expr_used(c, d, n.a); - return; - } else if kind == N_SIZEOF || kind == N_STR || kind == N_NUM - || kind == N_CHAR || kind == N_NIL { - return; - } else { - cdie(c, "not an expression"); + mark_func_used(c, d, v); } -} -func mark_stmt_used(c: *compiler, d: *decl, n: *node) { - var kind: int; + if op.a { + mark_ir_expr(c, d, op.a); + } - if !n { - return; + if op.b { + mark_ir_expr(c, d, op.b); } +} - c.filename = n.filename; - c.lineno = n.lineno; - c.colno = n.colno; +func mark_ir_used(c: *compiler, d: *decl) { + var ic: *irfunc; + var ib: *irblock; + var i: int; + var j: int; - kind = n.kind; - if kind == N_CONDLIST { - loop { - if !n { - break; - } + ic = d.func_ir; - mark_expr_used(c, d, n.a.a); + i = 0; + loop { + if i == ic.blocks_len { + break; + } - mark_stmt_used(c, d, n.a.b); + ib = ic.blocks[i]; - n = n.b; - } - return; - } else if kind == N_STMTLIST { + j = 0; loop { - if !n { + if j == ib.ops_len { break; } - mark_stmt_used(c, d, n.a); + mark_ir_expr(c, d, ib.ops[j]); - n = n.b; + j = j + 1; } - return; - } else if kind == N_LOOP { - mark_stmt_used(c, d, n.a); - return; - } else if kind == N_RETURN { - mark_expr_used(c, d, n.a); - return; - } else if kind == N_BREAK || kind == N_CONTINUE || kind == N_LABEL || kind == N_GOTO { - return; - } else if kind != N_VARDECL { - mark_expr_used(c, d, n); - return; + + i = i + 1; } } @@ -398,8 +327,8 @@ func check_usage(c: *compiler) { } c.used_top = d.used_next; - if d.func_def { - mark_stmt_used(c, d, d.func_def.b); + if d.func_ir { + mark_ir_used(c, d); } } } @@ -425,33 +354,28 @@ func defextern(c: *compiler, n: *node): *decl { d.func_defined = 1; d.func_type = t; - d.func_decl = n; return d; } -func defun(c: *compiler, n: *node) { +func defun(c: *compiler, funcdef: *node) { var d: *decl; var name: *byte; var v: *decl; var t: *type; - var offset: int; - - d = defextern(c, n.a); - - d.func_def = n; + var n: *node; - n = n.a.b.a; + d = defextern(c, funcdef.a); - offset = 16; + n = funcdef.a.b.a; loop { if (!n) { break; } - c.filename = n.a.filename; - c.lineno = n.a.lineno; - c.colno = n.a.colno; + c.filename = n.filename; + c.lineno = n.lineno; + c.colno = n.colno; c.s.filename = n.filename; c.s.lineno = n.lineno; @@ -465,17 +389,11 @@ func defun(c: *compiler, n: *node) { v.var_defined = 1; v.var_type = t; - v.var_offset = offset; - v.var_def = n.a; - offset = offset + 8; n = n.b; } - // Hoist locals - offset = hoist_locals(c, d, d.func_def.b, 0); - - d.func_preamble = offset; + hoist_locals(c, d, funcdef.b); } func defstruct(c: *compiler, n: *node) { @@ -528,8 +446,34 @@ func defunion(c: *compiler, n: *node) { d.struct_type = prototype(c, n.a); } -func defenum(c: *compiler, n: *node) { +func define_enum_tag(c: *compiler, name: *byte, value: int) { + var d: *decl; + + d = find(c, name, nil, 1); + + if d.enum_defined { + cdie(c, "duplicate enum"); + } + + d.enum_defined = 1; + d.enum_value = value; +} + +func define_ir_func(c: *compiler, ic: *irfunc, t: *type) { var d: *decl; + + d = find(c, ic.name, nil, 1); + + if d.func_defined { + cdie(c, "duplicate func"); + } + + d.func_defined = 1; + d.func_type = t; + d.func_ir = ic; +} + +func defenum(c: *compiler, n: *node) { var i: int; var name: *byte; @@ -545,19 +489,12 @@ func defenum(c: *compiler, n: *node) { c.colno = n.a.colno; name = n.a.a.s; - d = find(c, name, nil, 1); - - if (d.enum_defined) { - cdie(c, "duplicate enum"); - } if (n.a.b) { i = n.a.b.n; } - d.enum_defined = 1; - d.enum_value = i; - d.enum_def = n; + define_enum_tag(c, name, i); i = i + 1; n = n.b; @@ -605,7 +542,6 @@ func layout_struct(c: *compiler, d: *decl) { md.member_defined = 1; md.member_type = t; md.member_offset = offset; - md.member_def = m; offset = offset + type_sizeof(c, t); @@ -658,7 +594,6 @@ func layout_union(c: *compiler, d: *decl) { md.member_defined = 1; md.member_type = t; md.member_offset = 0; - md.member_def = m; member_size = type_sizeof(c, t); if member_size > size { @@ -1260,47 +1195,40 @@ func typecheck_stmt(c: *compiler, d: *decl, n: *node) { } } -func typecheck_func(c: *compiler, d: *decl) { - if (!d.func_def) { - return; - } - - typecheck_stmt(c, d, d.func_def.b); -} - -func hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int { +func hoist_locals(c: *compiler, d: *decl, n: *node) { var kind: int; var name: *byte; var t: *type; var v: *decl; if (!n) { - return offset; + return; } kind = n.kind; if (kind == N_CONDLIST) { loop { if (!n) { - return offset; + return; } - hoist_locals(c, d, n.a.b, offset); + hoist_locals(c, d, n.a.b); n = n.b; } } else if (kind == N_STMTLIST) { loop { if (!n) { - return offset; + return; } - offset = hoist_locals(c, d, n.a, offset); + hoist_locals(c, d, n.a); n = n.b; } } else if (kind == N_LOOP) { - return hoist_locals(c, d, n.a, offset); + hoist_locals(c, d, n.a); + return; } else if (kind == N_LABEL) { name = n.a.s; v = find(c, d.name, name, 1); @@ -1310,9 +1238,9 @@ func hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int { } v.goto_defined = 1; - return offset; + return; } else if (kind != N_VARDECL) { - return offset; + return; } name = n.a.s; @@ -1328,12 +1256,6 @@ func hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int { v.var_type = t; v.var_defined = 1; - - offset = offset + type_sizeof(c, t); - - v.var_offset = -offset; - - return offset; } func gather_include(c: *compiler, filename: *byte, slen: *int): *byte { @@ -2210,7 +2132,6 @@ func main(argc: int, argv: **byte, envp: **byte) { var input: *name_node; var tmp: *name_node; var link: **name_node; - var pout_filename: *byte; link = &input; @@ -2272,16 +2193,6 @@ func main(argc: int, argv: **byte, envp: **byte) { continue; } - if (!strcmp(argv[i], "-P")) { - i = i + 1; - if (i >= argc) { - die("invalid -P at end of argument list"); - } - pout_filename = argv[i]; - i = i + 1; - continue; - } - if (argv[i][0] == '-' as byte) { die("invalid argument"); } @@ -2295,10 +2206,6 @@ func main(argc: int, argv: **byte, envp: **byte) { i = i + 1; } - if pout_filename { - peg_open_output(c.p.pc, pout_filename); - } - tmp = input; loop { if !tmp { diff --git a/cc3.om b/cc3.om @@ -133,6 +133,9 @@ ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]* sp <- ( [ \r\n\t] / '//' (![\r\n] .)* )* +// NOTE: This makes the language context sensitive by use of [ and ' <- +// But, we can fix this by tweaking the peg grammar. + peg_grammar <- '%{' sp (peg_rule / peg_prefix)+ '%}' sp peg_rule <- peg_identifier sp '<-' sp peg_pattern peg_prefix <- '%prefix' sp peg_identifier sp diff --git a/decl.om b/decl.om @@ -11,10 +11,7 @@ struct decl { func_defined: int; func_type: *type; func_label: *label; - func_def: *node; - func_decl: *node; func_used: int; - func_preamble: int; func_ir: *irfunc; struct_defined: int; @@ -26,16 +23,12 @@ struct decl { member_defined: int; member_type: *type; member_offset: int; - member_def: *node; enum_defined: int; enum_value: int; - enum_def: *node; var_defined: int; var_type: *type; - var_offset: int; - var_def: *node; goto_defined: int; goto_label: *label; @@ -97,27 +90,21 @@ func find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl { d.func_defined = 0; d.func_type = nil; d.func_label = mklabel(c.s); - d.func_def = nil; d.func_used = 0; d.struct_defined = 0; d.struct_size = 0; d.struct_layout_done = 0; - d.struct_def = nil; d.member_defined = 0; d.member_type = nil; d.member_offset = 0; - d.member_def = nil; d.enum_defined = 0; d.enum_value = 0; - d.enum_def = nil; d.var_defined = 0; d.var_type = nil; - d.var_offset = 0; - d.var_def = nil; d.goto_defined = 0; d.goto_label = mklabel(c.s); diff --git a/ir.om b/ir.om @@ -100,6 +100,9 @@ struct irfunc { s: *assembler; a: *alloc; name: *byte; + filename: *byte; + lineno: int; + colno: int; loopctx: *irloopctx; blocks: **irblock; blocks_len: int; @@ -180,6 +183,42 @@ func mkirconst(ic: *irfunc, n: int): *irop { return o; } +func mkirarg(ic: *irfunc, n: int, a: *irop): *irop { + var o: *irop; + + o = mkirop(ic, IOP_ARG, a, nil); + + o.n = n; + + return o; +} + +func mkirvarop(ic: *irfunc, name: *byte): *irop { + var iv: *irvar; + var o: *irop; + + iv = *irfind_var(ic, name); + if !iv { + cdie(ic.c, "no such variable"); + } + + o = mkirop(ic, IOP_VAR, nil, nil); + + o.n = iv.n; + + return o; +} + +func mkirretval(ic: *irfunc, a: *irop, t: *type): *irop { + var o: *irop; + + o = mkirop(ic, IOP_RETVAL, a, nil); + + o.t = t; + + return o; +} + func mkirstr(ic: *irfunc, s: *byte, slen: int): *irop { var o: *irop; @@ -191,7 +230,7 @@ func mkirstr(ic: *irfunc, s: *byte, slen: int): *irop { return o; } -func mkirfunc(ic: *irfunc, name: *byte): *irop { +func mkirfuncref(ic: *irfunc, name: *byte): *irop { var o: *irop; o = mkirop(ic, IOP_FUNC, nil, nil); @@ -302,7 +341,33 @@ func iraddvar(ic: *irfunc, name: *byte, t: *type) { *iv = mkirvar(ic, name, t); } -func ircall(ic: *irfunc, n: *node): *irop { +func ircall(ic: *irfunc, fp: *irop, nargs: int) { + var o: *irop; + var cur: *irblock; + var next: *irblock; + + // Emit the call + o = mkirop(ic, IOP_CALL, fp, nil); + o.n = nargs; + iraddop(ic, o); + + // Link the return path + next = mkirblock(ic); + + cur = ic.cur; + if cur { + if cur.done { + cdie(ic.c, "block already done"); + } + + cur.done = 1; + cur.out = next; + } + + ic.cur = next; +} + +func call_to_ir(ic: *irfunc, n: *node): *irop { var o: *irop; var a: *irop; var b: *irop; @@ -387,9 +452,7 @@ func ircall(ic: *irfunc, n: *node): *irop { break; } - o = mkirop(ic, IOP_ARG, tmp[i], nil); - o.n = i; - o.t = arg.a.t; + o = mkirarg(ic, i, tmp[i]); iraddop(ic, o); arg = arg.b; @@ -400,29 +463,10 @@ func ircall(ic: *irfunc, n: *node): *irop { // Add a temporary for the return value ret = mkirtmp(ic, n.t); - o = mkirop(ic, IOP_RETVAL, ret, nil); - o.t = n.t; - iraddop(ic, o); - - // Emit the call - o = mkirop(ic, IOP_CALL, fp, nil); - o.n = count; + o = mkirretval(ic, ret, n.t); iraddop(ic, o); - // Link the return path - next = mkirblock(ic); - - cur = ic.cur; - if cur { - if cur.done { - cdie(ic.c, "block already done"); - } - - cur.done = 1; - cur.out = next; - } - - ic.cur = next; + ircall(ic, fp, count); // Return an expression that contains the return value return ret; @@ -473,7 +517,7 @@ func expr_to_ir(ic: *irfunc, n: *node): *irop { o = mkirstr(ic, n.s, strlen(n.s)); return o; } else if kind == N_CALL { - o = ircall(ic, n); + o = call_to_ir(ic, n); return o; } else if kind == N_IDENT { v = find(ic.c, n.s, nil, 0); @@ -494,7 +538,7 @@ func expr_to_ir(ic: *irfunc, n: *node): *irop { // function if v && v.func_defined { - o = mkirfunc(ic, n.s); + o = mkirfuncref(ic, n.s); return o; } @@ -1103,14 +1147,8 @@ func locals_to_ir(ic: *irfunc, n: *node) { } } -func func_to_ir(c: *compiler, n: *node): *irfunc { +func mkirfunc(c: *compiler, name: *byte): *irfunc { var ic: *irfunc; - var value: *irop; - var t: *type; - - if !n { - return nil; - } ic = alloc(c.a, sizeof(*ic)) as *irfunc; @@ -1119,10 +1157,27 @@ func func_to_ir(c: *compiler, n: *node): *irfunc { ic.s = c.s; mkirblock(ic); - ic.cur = ic.blocks[0]; - ic.name = n.a.a.s; + ic.name = name; + + return ic; +} + +func func_to_ir(c: *compiler, n: *node): *irfunc { + var ic: *irfunc; + var value: *irop; + var t: *type; + + if !n { + return nil; + } + + ic = mkirfunc(c, n.a.a.s); + + ic.filename = n.filename; + ic.lineno = n.lineno; + ic.colno = n.colno; args_to_ir(ic, n.a.b.a); @@ -1571,11 +1626,11 @@ func output_ir(c: *compiler, d: *decl) { ic = d.func_ir; - ic.c.filename = d.func_def.filename; - ic.c.lineno = d.func_def.lineno; - ic.c.colno = d.func_def.colno; - ic.s.filename = d.func_def.filename; - ic.s.lineno = d.func_def.lineno; + ic.c.filename = ic.filename; + ic.c.lineno = ic.lineno; + ic.c.colno = ic.colno; + ic.s.filename = ic.filename; + ic.s.lineno = ic.lineno; fixup_label(c.s, d.func_label); add_symbol(c.s, d.name, d.func_label); diff --git a/ircout.om b/ircout.om @@ -21,11 +21,11 @@ func ircout(c: *compiler) { // Real entry point to translated entry point fputs(c.cout, "int main(int argc, char **argv) {\n"); - fputs(c.cout, "\tmy__start((u)argc, (u)argv, 0);\n"); + fputs(c.cout, "\tz_start((u)argc, (u)argv, 0);\n"); fputs(c.cout, "\treturn 0;\n"); fputs(c.cout, "}\n"); - fputs(c.cout, "s syscall(s nr, ...);\n"); - fputs(c.cout, "u my_syscall(u nr, u a, u b, u c, u d, u e, u f) {\n"); + fputs(c.cout, "s syscall();\n"); + fputs(c.cout, "u zsyscall(u nr, u a, u b, u c, u d, u e, u f) {\n"); fputs(c.cout, "\treturn (u)syscall((s)nr, (s)a, (s)b, (s)c, (s)d, (s)e, (s)f);\n"); fputs(c.cout, "}\n"); @@ -50,7 +50,7 @@ func ircdecl(c: *compiler, ic: *irfunc) { var i: int; var iv: *irvar; - fputs(c.cout, "u my_"); + fputs(c.cout, "u z"); fputs(c.cout, ic.name); fputs(c.cout, "("); @@ -62,7 +62,7 @@ func ircdecl(c: *compiler, ic: *irfunc) { iv = ic.vars[i]; - fputs(c.cout, "u v_"); + fputs(c.cout, "u v"); fputs(c.cout, iv.name); i = i + 1; @@ -83,8 +83,9 @@ func ircforward(c: *compiler, d: *decl) { return; } - ircdecl(c, ic); - fputs(c.cout, ";\n"); + fputs(c.cout, "u z"); + fputs(c.cout, ic.name); + fputs(c.cout, "();\n"); } func ircdefine(c: *compiler, d: *decl) { @@ -136,7 +137,7 @@ func ircdefine(c: *compiler, d: *decl) { fputs(c.cout, "\tu "); if iv.name { - fputs(c.cout, "v_"); + fputs(c.cout, "v"); fputs(c.cout, iv.name); } else { fputs(c.cout, "v"); @@ -259,6 +260,7 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) { var i: int; var op: *irop; var kind: int; + var didlabel: int; loop { if i + 1 == ib.ops_len { @@ -275,10 +277,11 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) { ircline(c, op); - if i == 0 { + if !didlabel { fputs(c.cout, "b"); fputd(c.cout, ib.n); fputs(c.cout, ":"); + didlabel = 1; } fputs(c.cout, "\t"); @@ -294,20 +297,22 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) { if kind == IOP_CALL { ircline(c, op); - if i == 0 { + if !didlabel { fputs(c.cout, "b"); fputd(c.cout, ib.n); fputs(c.cout, ":"); + didlabel = 1; } irccall(c, ic, ib, op); } else if kind == IOP_JUMP { ircline(c, op); - if i == 0 { + if !didlabel { fputs(c.cout, "b"); fputd(c.cout, ib.n); fputs(c.cout, ":"); + didlabel = 1; } fputs(c.cout, "\tgoto b"); @@ -316,15 +321,16 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) { } else if kind == IOP_BRANCH { ircline(c, op); - if i == 0 { + if !didlabel { fputs(c.cout, "b"); fputd(c.cout, ib.n); fputs(c.cout, ":"); + didlabel = 1; } - fputs(c.cout, "\tif (!("); - ircop(c, ic, ib, op.a); - fputs(c.cout, ")) { goto b"); + fputs(c.cout, "\tif ("); + ircop_neg(c, ic, ib, op.a); + fputs(c.cout, ") { goto b"); fputd(c.cout, ib.alt.n); fputs(c.cout, "; }\n"); @@ -337,10 +343,11 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) { } else if kind == IOP_RETURN { ircline(c, op); - if i == 0 { + if !didlabel { fputs(c.cout, "b"); fputd(c.cout, ib.n); fputs(c.cout, ":"); + didlabel = 1; } fputs(c.cout, "\treturn "); @@ -392,6 +399,53 @@ func ircstr(c: *compiler, s: *byte, n: int) { fputs(c.cout, "\""); } +func ircop_neg(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) { + var kind: int; + + kind = op.kind; + if kind == IOP_EQ { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") != ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_NE { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") == ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_GT { + fputs(c.cout, "(s)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") <= (s)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_GE { + fputs(c.cout, "(s)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") < (s)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_LT { + fputs(c.cout, "(s)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") >= (s)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_LE { + fputs(c.cout, "(s)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") > (s)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else { + fputs(c.cout, "!("); + ircop(c, ic, ib, op); + fputs(c.cout, ")"); + } +} + func ircop(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) { var kind: int; var iv: *irvar; @@ -420,14 +474,14 @@ func ircop(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) { } if iv.name { - fputs(c.cout, "v_"); + fputs(c.cout, "v"); fputs(c.cout, iv.name); } else { fputs(c.cout, "v"); fputd(c.cout, op.n); } } else if kind == IOP_FUNC { - fputs(c.cout, "(u)my_"); + fputs(c.cout, "(u)z"); fputs(c.cout, op.s); } else if kind == IOP_CONST { fputd(c.cout, op.n); @@ -531,17 +585,17 @@ func ircop(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) { ircop(c, ic, ib, op.b); fputs(c.cout, ")"); } else if kind == IOP_EQ { - fputs(c.cout, "(u)(((s)("); + fputs(c.cout, "(u)(("); ircop(c, ic, ib, op.a); - fputs(c.cout, ")) == ((s)("); + fputs(c.cout, ") == ("); ircop(c, ic, ib, op.b); - fputs(c.cout, ")))"); + fputs(c.cout, "))"); } else if kind == IOP_NE { - fputs(c.cout, "(u)(((s)("); + fputs(c.cout, "(u)(("); ircop(c, ic, ib, op.a); - fputs(c.cout, ")) != ((s)("); + fputs(c.cout, ") != ("); ircop(c, ic, ib, op.b); - fputs(c.cout, ")))"); + fputs(c.cout, "))"); } else if kind == IOP_GT { fputs(c.cout, "(u)(((s)("); ircop(c, ic, ib, op.a); @@ -580,7 +634,7 @@ func irccall(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) { ircop(c, ic, ib, op.a); fputs(c.cout, "))("); } else { - fputs(c.cout, "my_"); + fputs(c.cout, "z"); fputs(c.cout, op.a.s); fputs(c.cout, "("); } diff --git a/parse2.om b/parse2.om @@ -5,16 +5,16 @@ struct parser { err: *file; } -func setup_parser(a: *alloc, err: *file): *parser { +func setup_parser(cc: *compiler, err: *file): *parser { var c: *parser; - c = alloc(a, sizeof(*c)) as *parser; + c = alloc(cc.a, sizeof(*c)) as *parser; - c.a = a; + c.a = cc.a; - c.p = peg_new("", "", 0, a, peg_P_grammar, P_tag_to_str); + c.p = peg_new("", "", 0, cc.a, peg_P_grammar, P_tag_to_str); - c.pc = setup_peg(a); + c.pc = setup_peg(cc); c.err = err; diff --git a/peg.om b/peg.om @@ -3,9 +3,10 @@ struct peg_compiler { a: *alloc; p: *peg; - out: *file; + cc: *compiler; scratch: *byte; prefix: *byte; + prefix_len: int; } enum { @@ -72,36 +73,42 @@ func decode_count(n: *peg_node): int { } } -func translate_literal(c: *peg_compiler, n: *peg_node) { - var i: int; - var len: int; - var ch: int; - var hex: *byte; - - hex = "0123456789abcdef"; - - fputs(c.out, " ok = literal(c, \""); - i = 1; - len = n.len - 1; - loop { - if i == len { - break; - } - - ch = n.str[i] as int; - - if ch < 32 || ch > 127 || ch == '\\' || ch == '"' { - fputc(c.out, '\\'); - fputc(c.out, 'x'); - fputc(c.out, hex[ch >> 4] as int); - fputc(c.out, hex[ch & 15] as int); - } else { - fputc(c.out, ch); - } - - i = i + 1; - } - fputs(c.out, "\");\n"); +func translate_literal(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) { + var a: *irop; + var b: *irop; + var o: *irop; + var ret: *irop; + var t: *type; + var next: *irblock; + var s: *byte; + + s = alloc(c.a, n.len - 1); + memcpy(s, &n.str[1], n.len - 2); + s[n.len - 1] = 0 as byte; + + t = mktype0(c.cc, TY_INT); + + next = mkirblock(ic); + + // ret = literal(c, ${n.str}); + ret = mkirtmp(ic, t); + o = mkirretval(ic, ret, t); + iraddop(ic, o); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirstr(ic, s, n.len - 2); + o = mkirarg(ic, 1, a); + iraddop(ic, o); + a = mkirfuncref(ic, "literal"); + ircall(ic, a, 2); + + // if ret == 0 { goto fail; } + a = ret; + b = mkirconst(ic, 0); + o = mkirop(ic, IOP_NE, a, b); + + irbranch(ic, o, fail, next); } func parse_escape(s: *byte, i: *int, n: int): int { @@ -153,14 +160,22 @@ func parse_escape(s: *byte, i: *int, n: int): int { } } -func translate_charset(c: *peg_compiler, n: *peg_node) { +func translate_charset(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) { var i: int; + var j: int; var len: int; var ch: int; var a: int; var b: int; var hex: *byte; var count: int; + var oa: *irop; + var ob: *irop; + var o: *irop; + var ret: *irop; + var t: *type; + var next: *irblock; + var s: *byte; hex = "0123456789abcdef"; @@ -222,84 +237,173 @@ func translate_charset(c: *peg_compiler, n: *peg_node) { i = i + 1; } - if count == 0 { - fputs(c.out, " fail(c);\n"); - fputs(c.out, " ok = 0;\n"); - return; - } else if count >= 255 { - fputs(c.out, " ok = any(c);\n"); - return; - } else if count == 1 { - fputs(c.out, " ok = literal(c, \""); - } else { - fputs(c.out, " ok = charset(c, \""); - } - i = 1; + j = 0; loop { if i == 256 { break; } if c.scratch[i] { - if ch < 32 || ch > 127 || ch == '\\' || ch == '"' { - fputc(c.out, '\\'); - fputc(c.out, 'x'); - fputc(c.out, hex[i >> 4] as int); - fputc(c.out, hex[i & 15] as int); - } else { - fputc(c.out, i); - } + c.scratch[j] = i as byte; + j = j + 1; } i = i + 1; } - fputs(c.out, "\");\n"); + + c.scratch[j] = 0 as byte; + + s = alloc(c.a, j + 1); + memcpy(s, c.scratch, j); + s[j] = 0 as byte; + + t = mktype0(c.cc, TY_INT); + + next = mkirblock(ic); + + // ret = charset(c, ${charset}); + ret = mkirtmp(ic, t); + o = mkirretval(ic, ret, t); + iraddop(ic, o); + oa = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, oa); + iraddop(ic, o); + oa = mkirstr(ic, s, j); + o = mkirarg(ic, 1, oa); + iraddop(ic, o); + oa = mkirfuncref(ic, "charset"); + ircall(ic, oa, 2); + + // if ret == 0 { goto fail; } + oa = ret; + ob = mkirconst(ic, 0); + o = mkirop(ic, IOP_NE, oa, ob); + + irbranch(ic, o, fail, next); +} + +func translate_call(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) { + var prefix: *byte; + var prefix_len: int; + var result: *byte; + var len: int; + var a: *irop; + var b: *irop; + var o: *irop; + var ret: *irop; + var t: *type; + var next: *irblock; + + prefix = c.prefix; + prefix_len = strlen(prefix); + + len = prefix_len + n.len + 4; + + result = alloc(c.a, len + 1); + + memcpy(result, "peg_", 4); + memcpy(&result[4], prefix, prefix_len); + memcpy(&result[4 + prefix_len], n.str, n.len); + result[len] = 0 as byte; + + next = mkirblock(ic); + + t = mktype0(c.cc, TY_INT); + + ret = mkirtmp(ic, t); + + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + + o = mkirretval(ic, ret, t); + iraddop(ic, o); + + a = mkirfuncref(ic, result); + ircall(ic, a, 1); + + a = ret; + b = mkirconst(ic, 0); + o = mkirop(ic, IOP_NE, a, b); + irbranch(ic, o, fail, next); } -func translate_pattern(c: *peg_compiler, n: *peg_node) { +func translate_pattern(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) { var count: int; var look: int; var d: *peg_node; + var a: *irop; + var b: *irop; + var o: *irop; + var ret: *irop; + var t: *type; + var next: *irblock; + var out: *irblock; loop { - if n.tag == P_peg_pattern { + if n.tag == P_peg_pattern { // A sequence of alternatives d = n.child; if !d.next { - translate_pattern(c, d); + translate_pattern(c, ic, fail, d); } else { - fputs(c.out, " choice(c);\n"); - translate_pattern(c, d); - d = d.next; + next = mkirblock(ic); + out = mkirblock(ic); + + irjump(ic, next, next); + loop { if !d { break; } - fputs(c.out, " if !ok { choice(c);\n"); - translate_pattern(c, d); - fputs(c.out, " }\n"); + next = mkirblock(ic); + + // next: choice(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "choice"); + ircall(ic, a, 1); + // if !pattern { goto next; } + translate_pattern(c, ic, next, d); + + // goto out; + irjump(ic, out, next); d = d.next; } - fputs(c.out, " if ok { commit(c); } else { fail(c); }\n"); + + // next: fail(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "fail"); + ircall(ic, a, 1); + // goto fail; + irjump(ic, fail, out); + + // out: commit(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "commit"); + ircall(ic, a, 1); } - } else if n.tag == P_peg_alternative { + } else if n.tag == P_peg_alternative { // A sequence of concatenations d = n.child; - translate_pattern(c, d); + translate_pattern(c, ic, fail, d); d = d.next; loop { if !d { break; } - fputs(c.out, " if ok {\n"); - translate_pattern(c, d); - fputs(c.out, " }\n"); + translate_pattern(c, ic, fail, d); d = d.next; } - } else if n.tag == P_peg_lookahead { + } else if n.tag == P_peg_lookahead { // An and or a not pattern that does not consume look = decode_look(n); d = n.child; if d.tag == P_peg_lookop { @@ -307,59 +411,158 @@ func translate_pattern(c: *peg_compiler, n: *peg_node) { } if look == LOOK_AND { - fputs(c.out, " choice(c);\n"); - translate_pattern(c, d); - fputs(c.out, " fail(c);\n"); + out = mkirblock(ic); + next = mkirblock(ic); + + // choice(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "choice"); + ircall(ic, a, 1); + + // if !pattern { goto next; } + translate_pattern(c, ic, next, d); + + // fail(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "fail"); + ircall(ic, a, 1); + + // goto out; + irjump(ic, out, next); + + // next: fail(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "fail"); + ircall(ic, a, 1); + + // goto fail; + irjump(ic, fail, out); + + // out: } else if look == LOOK_NOT { - fputs(c.out, " choice(c);\n"); - translate_pattern(c, d); - fputs(c.out, " if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }\n"); + out = mkirblock(ic); + + // choice(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "choice"); + ircall(ic, a, 1); + + // if !pattern { goto out; } + translate_pattern(c, ic, out, d); + + // fail(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "fail"); + ircall(ic, a, 1); + + // fail(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "fail"); + ircall(ic, a, 1); + + // goto fail; + irjump(ic, fail, out); + + // out: } else if look == LOOK_NORMAL { - translate_pattern(c, d); + translate_pattern(c, ic, fail, d); } else { die("invalid lookop"); } - } else if n.tag == P_peg_suffix { + } else if n.tag == P_peg_suffix { // Repetition operator count = decode_count(n); if count == ZERO_OR_ONE { - fputs(c.out, " choice(c);\n"); - translate_pattern(c, n.child); - fputs(c.out, " if ok { commit(c); } else { ok = 1; }\n"); + // choice(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "choice"); + ircall(ic, a, 1); + + out = mkirblock(ic); + + // if !pattern { goto out; } + translate_pattern(c, ic, out, n.child); + + // commit(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "commit"); + ircall(ic, a, 1); + + // out: + irjump(ic, out, out); } else if count == EXACTLY_ONE { - translate_pattern(c, n.child); + translate_pattern(c, ic, fail, n.child); } else if count == ZERO_OR_MORE { - fputs(c.out, " loop {\n"); - fputs(c.out, " choice(c);\n"); - translate_pattern(c, n.child); - fputs(c.out, " if !ok { ok = 1; break; }\n"); - fputs(c.out, " commit(c);\n"); - fputs(c.out, " }\n"); +zero_or_more: next = mkirblock(ic); + out = mkirblock(ic); + + // next: + irjump(ic, next, next); + + // choice(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "choice"); + ircall(ic, a, 1); + + // if !pattern { goto out; } + translate_pattern(c, ic, out, n.child); + + // commit(c); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "commit"); + ircall(ic, a, 1); + + // goto next; out: + irjump(ic, next, out); } else if count == ONE_OR_MORE { - translate_pattern(c, n.child); - fputs(c.out, " if ok {\n"); - fputs(c.out, " loop {\n"); - fputs(c.out, " choice(c);\n"); - translate_pattern(c, n.child); - fputs(c.out, " if !ok { ok = 1; break; }\n"); - fputs(c.out, " commit(c);\n"); - fputs(c.out, " }\n"); - fputs(c.out, " }\n"); + translate_pattern(c, ic, fail, n.child); + goto zero_or_more; } else { die("invalid countop"); } } else if n.tag == P_peg_primary { - translate_pattern(c, n.child); + translate_pattern(c, ic, fail, n.child); } else if n.tag == P_peg_any { - fputs(c.out, " ok = any(c);\n"); + // if !any(c) { goto fail; } + next = mkirblock(ic); + t = mktype0(c.cc, TY_INT); + ret = mkirtmp(ic, t); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + o = mkirretval(ic, ret, t); + iraddop(ic, o); + a = mkirfuncref(ic, "any"); + ircall(ic, a, 1); + a = ret; + b = mkirconst(ic, 0); + o = mkirop(ic, IOP_NE, a, b); + irbranch(ic, o, fail, next); } else if n.tag == P_peg_literal { - translate_literal(c, n); + translate_literal(c, ic, fail, n); } else if n.tag == P_peg_class { - translate_charset(c, n); + translate_charset(c, ic, fail, n); } else if n.tag == P_peg_call { - fputs(c.out, " ok = peg_"); - fputs(c.out, c.prefix); - fputb(c.out, n.child.str, n.child.len); - fputs(c.out, "(c);\n"); + translate_call(c, ic, fail, n); } else if n.tag == P_sp { n = n.next; continue; @@ -379,69 +582,223 @@ func setprefix(c: *peg_compiler, s: *byte, n: int) { p[n] = 0 as byte; c.prefix = p; + c.prefix_len = n; } -func translate(c: *peg_compiler, n: *peg_node) { - var v: *peg_node; +func translate_rule(c: *peg_compiler, i: int, tag: *byte, v: *peg_node) { + var d: *decl; + var arg1_type: *type; + var args_type: *type; + var ret_type: *type; + var func_type: *type; + var ic: *irfunc; + var next: *irblock; + var fail: *irblock; + var o: *irop; + var a: *irop; + var b: *irop; + var name: *byte; + var tag_len: int; + var name_len: int; + + tag_len = strlen(tag); + name_len = tag_len + 4; + + name = alloc(c.a, name_len + 1); + memcpy(name, "peg_", 4); + memcpy(&name[4], tag, tag_len); + name[name_len] = 0 as byte; + + d = find(c.cc, "peg", nil, 1); + + arg1_type = mktype_struct(c.cc, d); + arg1_type = mktype1(c.cc, TY_PTR, arg1_type); + args_type = mktype1(c.cc, TY_ARG, arg1_type); + + ret_type = mktype0(c.cc, TY_INT); + + // func(*peg):int + func_type = mktype2(c.cc, TY_FUNC, ret_type, args_type); + + ic = mkirfunc(c.cc, name); + + iraddarg(ic, "c", arg1_type); + + fail = mkirblock(ic); + + // enter(c, $tag); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirconst(ic, i); + o = mkirarg(ic, 1, a); + iraddop(ic, o); + a = mkirfuncref(ic, "enter"); + ircall(ic, a, 2); + + //if !pattern { goto fail; } + translate_pattern(c, ic, fail, v.child.next); + + // leave(c, $tag); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirconst(ic, i); + o = mkirarg(ic, 1, a); + iraddop(ic, o); + a = mkirfuncref(ic, "leave"); + ircall(ic, a, 2); + + // ok: return 1; + o = mkirconst(ic, 1); + irreturn(ic, o); + + // fail: fail(c); + irjump(ic, fail, fail); + a = mkirvarop(ic, "c"); + o = mkirarg(ic, 0, a); + iraddop(ic, o); + a = mkirfuncref(ic, "fail"); + ircall(ic, a, 1); + + // return 0; + o = mkirconst(ic, 0); + irreturn(ic, o); + + define_ir_func(c.cc, ic, func_type); +} - v = n.child; +func translate_rules(c: *peg_compiler, tags: **byte, nodes: **peg_node, num_rules: int) { + var i: int; + + i = 0; loop { - if !v { + if i == num_rules { break; } - if v.tag == P_peg_prefix { - setprefix(c, v.child.str, v.child.len); - } + translate_rule(c, i, tags[i], nodes[i]); - v = v.next; + i = i + 1; } +} - // Generate tags for each rule - fputs(c.out, "enum {\n"); - v = n.child; +func translate_tag_to_str(c: *peg_compiler, tags: **byte, num_rules: int) { + var name: *byte; + var tag: *byte; + var taglen: int; + var i: int; + var arg1_type: *type; + var args_type: *type; + var ret_type: *type; + var func_type: *type; + var ic: *irfunc; + var o: *irop; + var a: *irop; + var b: *irop; + var this: *irblock; + var next: *irblock; + + name = make_tag(c, "tag_to_str", 10); + + arg1_type = mktype0(c.cc, TY_INT); + args_type = mktype1(c.cc, TY_ARG, arg1_type); + + ret_type = mktype0(c.cc, TY_BYTE); + ret_type = mktype1(c.cc, TY_PTR, ret_type); + + // func(int):*byte; + func_type = mktype2(c.cc, TY_FUNC, ret_type, args_type); + + ic = mkirfunc(c.cc, name); + + iraddarg(ic, "tag", arg1_type); + + i = 0; loop { - if !v { + if i == num_rules { break; } - if v.tag == P_peg_rule { - fputs(c.out, " "); - fputs(c.out, c.prefix); - fputb(c.out, v.child.str, v.child.len); - fputs(c.out, ",\n"); - } + tag = tags[i]; + taglen = strlen(tag); - v = v.next; + this = mkirblock(ic); + next = mkirblock(ic); + + // `if tag == ,i { return ,tag; } + a = mkirop(ic, IOP_VAR, nil, nil); + a.n = 0; + b = mkirconst(ic, i); + o = mkirop(ic, IOP_EQ, a, b); + irbranch(ic, o, next, this); + o = mkirstr(ic, tag, taglen); + irreturn(ic, o); + + irjump(ic, next, next); + + i = i + 1; } - fputs(c.out, "}\n\n"); - // Generate tag to string - fputs(c.out, "func "); - fputs(c.out, c.prefix); - fputs(c.out, "tag_to_str(tag: int): *byte {\n"); + // return nil + o = mkirconst(ic, 0); + irreturn(ic, o); + + define_ir_func(c.cc, ic, func_type); +} + +func make_tag(c: *peg_compiler, suffix: *byte, suffix_len: int): *byte { + var prefix: *byte; + var prefix_len: int; + var result: *byte; + var len: int; + + prefix = c.prefix; + prefix_len = strlen(prefix); + + len = prefix_len + suffix_len; + + result = alloc(c.a, len + 1); + + memcpy(result, prefix, prefix_len); + memcpy(&result[prefix_len], suffix, suffix_len); + result[len] = 0 as byte; + + return result; +} + +func translate(c: *peg_compiler, n: *peg_node) { + var v: *peg_node; + var i: int; + var num_rules: int; + var tags: **byte; + var nodes: **peg_node; + var tag: *byte; + + // Find the prefix v = n.child; + num_rules = 0; loop { if !v { break; } + if v.tag == P_peg_prefix { + setprefix(c, v.child.str, v.child.len); + } + if v.tag == P_peg_rule { - fputs(c.out, " if tag == "); - fputs(c.out, c.prefix); - fputb(c.out, v.child.str, v.child.len); - fputs(c.out, " { return \""); - fputb(c.out, v.child.str, v.child.len); - fputs(c.out, "\"; }\n"); + num_rules = num_rules + 1; } v = v.next; } - fputs(c.out, " die(\"invalid tag\");\n"); - fputs(c.out, " return nil;\n"); - fputs(c.out, "}\n"); - // Generate parsing functions for each rule + tags = alloc(c.a, num_rules * sizeof(*tags)) as **byte; + nodes = alloc(c.a, num_rules * sizeof(*nodes)) as **peg_node; + + // Generate tags for each rule + i = 0; v = n.child; loop { if !v { @@ -449,63 +806,33 @@ func translate(c: *peg_compiler, n: *peg_node) { } if v.tag == P_peg_rule { - fputs(c.out, "\nfunc peg_"); - fputs(c.out, c.prefix); - fputb(c.out, v.child.str, v.child.len); - fputs(c.out, "(c: *peg): int {\n"); - fputs(c.out, " var ok: int;\n"); - fputs(c.out, " enter(c, "); - fputs(c.out, c.prefix); - fputb(c.out, v.child.str, v.child.len); - fputs(c.out, ");\n"); - translate_pattern(c, v.child.next); - fputs(c.out, " if ok { leave(c, "); - fputs(c.out, c.prefix); - fputb(c.out, v.child.str, v.child.len); - fputs(c.out, "); } else { fail(c); }\n"); - fputs(c.out, " return ok;\n"); - fputs(c.out, "}\n"); + tag = make_tag(c, v.child.str, v.child.len); + define_enum_tag(c.cc, tag, i); + tags[i] = tag; + nodes[i] = v; + i = i + 1; } v = v.next; } + + // Generate tag to string + translate_tag_to_str(c, tags, num_rules); + + // Generate parsing functions for each rule + translate_rules(c, tags, nodes, num_rules); } -func setup_peg(a: *alloc): *peg_compiler { +func setup_peg(cc: *compiler): *peg_compiler { var c: *peg_compiler; - c = alloc(a, sizeof(*c)) as *peg_compiler; - c.a = a; + c = alloc(cc.a, sizeof(*c)) as *peg_compiler; + c.a = cc.a; c.prefix = ""; - c.scratch = alloc(c.a, 256); + c.scratch = alloc(cc.a, 256); + c.cc = cc; return c; } -func peg_open_output(c: *peg_compiler, filename: *byte) { - var fd: int; - var f: *file; - - unlink(filename); - - fd = open(filename, O_CREAT | O_WRONLY, (6 << 6) + (6 << 3) + 6); - if fd < 0 { - die("peg_open_output: failed to open output"); - } - - f = fopen(fd, c.a); - c.out = f; -} - func peg_compile(c: *peg_compiler, pn: *peg_node, err: *file) { - var node: *peg_node; - - if !c.out { - return; - } - - c.p = peg_new(pn.filename, pn.str, pn.len, c.a, peg_P_peg_grammar, P_tag_to_str); - - node = peg_parse(c.p, P_sp, err); - translate(c, node); - - fflush(c.out); + translate(c, pn); }