commit 51b076d69589388ca26d29debfc47f1193a481dd
parent 3cdca44786443b4142a7a98be8476b890a29a03c
Author: erai <erai@omiltem.net>
Date: Sun, 9 Feb 2025 00:28:30 +0000
directly genereate ir in peg
Diffstat:
M | bootstrap.sh | | | 7 | ++----- |
M | build.sh | | | 5 | ++--- |
M | cc1.om | | | 275 | ++++++++++++++++++++++++++----------------------------------------------------- |
M | cc3.om | | | 3 | +++ |
M | decl.om | | | 13 | ------------- |
M | ir.om | | | 139 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ |
M | ircout.om | | | 104 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------- |
M | parse2.om | | | 10 | +++++----- |
M | peg.om | | | 699 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------- |
9 files changed, 792 insertions(+), 463 deletions(-)
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -1,15 +1,12 @@
#!/bin/sh
-set -x
-
BOOTSTRAP="cc0.c"
LIBS="bufio.om lib.om alloc.om syscall.om"
SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om"
# Build the bootstrap compiler from c
-[ cc0 -nt cc0.c ] || gcc -O1 -g -std=c99 ${BOOTSTRAP} -o cc0
-./cc0 cc3.om -P parse3.om
-./cc0 ${LIBS} ${SOURCES} parse3.om -o cc1 -n cc1.lines -G cc1.call
+[ cc0 -nt cc0.c ] || gcc -O1 -g -std=c89 ${BOOTSTRAP} -o cc0
+./cc0 ${LIBS} ${SOURCES} cc3.om -o cc1 -n cc1.lines -G cc1.call
# Double check the bootstrap and self hosting compiler have the same output
./cc1 ${LIBS} ${SOURCES} cc3.om -C cc2.c -o cc2 -n cc2.lines -G cc2.call
diff --git a/build.sh b/build.sh
@@ -3,12 +3,11 @@
LIBS="peglib.om bufio.om lib.om alloc.om syscall.om"
CRYPTO="ed25519.om sha512.om sha256.om chacha20.om poly1305.om"
CC="cc1.om type.om as.om decl.om node.om"
-PEG="peg.om parsepeg.om"
SSHD="chacha20.om poly1305.om sha256.om sha512.om ed25519.om sshd.om"
KERNEL="kernel.om"
SHELL="echo.om cmp.om rm.om ls.om cat.om xxd.om mv.om mkdir.om cpio.om sh.om"
-BIN="echo cmp rm ls cat xxd mv mkdir cpio sh sshd init cc1 build.sh peg.peg cc3.peg"
-ALL="${LIBS} ${CC} ${PEG} ${SSHD} ${KERNEL} ${SHELL} ${BIN}"
+BIN="echo cmp rm ls cat xxd mv mkdir cpio sh sshd init cc1 build.sh cc3.om"
+ALL="${LIBS} ${CC} ${SSHD} ${KERNEL} ${SHELL} ${BIN}"
./cc1 ${LIBS} echo.om -o echo -n echo.lines -G echo.call
./cc1 ${LIBS} cmp.om -o cmp -n cmp.lines -G cmp.call
diff --git a/cc1.om b/cc1.om
@@ -62,7 +62,7 @@ func comp_setup(a: *alloc, err: *file): *compiler {
c.err = err;
- c.p = setup_parser(a, err);
+ c.p = setup_parser(c, err);
c.filename = nil;
c.lineno = 0;
@@ -156,18 +156,23 @@ func compile(c: *compiler, p: *node) {
d = next_decl(c, d);
}
- // Typecheck functions
- d = first_decl(c);
+ // Typecheck functions and translate to ir
+ n = p;
loop {
- if (!d) {
+ if (!n) {
break;
}
- if d.func_defined {
- typecheck_func(c, d);
+ kind = n.a.kind;
+ if (kind == N_FUNC) {
+ d = find(c, n.a.a.a.s, nil, 0);
+ if n.a.b {
+ typecheck_stmt(c, d, n.a.b);
+ }
+ d.func_ir = func_to_ir(c, n.a);
}
- d = next_decl(c, d);
+ n = n.b;
}
// Check usage
@@ -203,20 +208,6 @@ func compile(c: *compiler, p: *node) {
check_usage(c);
- // Translate each function to ir
- d = first_decl(c);
- loop {
- if (!d) {
- break;
- }
-
- if d.func_used && d.func_defined && d.func_def {
- d.func_ir = func_to_ir(c, d.func_def);
- }
-
- d = next_decl(c, d);
- }
-
// Do optimization passes
d = first_decl(c);
loop {
@@ -224,7 +215,7 @@ func compile(c: *compiler, p: *node) {
break;
}
- if d.func_used && d.func_defined && d.func_def {
+ if d.func_used && d.func_defined && d.func_ir {
ir_optimize(d.func_ir);
}
@@ -238,7 +229,7 @@ func compile(c: *compiler, p: *node) {
break;
}
- if d.func_used && d.func_defined && d.func_def {
+ if d.func_used && d.func_defined && d.func_ir {
output_ir(c, d);
}
@@ -273,118 +264,56 @@ func mark_func_used(c: *compiler, d: *decl, v: *decl) {
c.used_top = v;
}
-func mark_expr_used(c: *compiler, d: *decl, n: *node) {
+func mark_ir_expr(c: *compiler, d: *decl, op: *irop) {
var v: *decl;
var kind: int;
- if !n {
- return;
- }
-
- c.filename = n.filename;
- c.lineno = n.lineno;
- c.colno = n.colno;
-
- kind = n.kind;
- if kind == N_EXPRLIST {
- loop {
- if !n {
- break;
- }
-
- mark_expr_used(c, d, n.a);
-
- n = n.b;
- }
- return;
- } else if kind == N_IDENT {
- v = find(c, n.s, nil, 0);
- if v && v.enum_defined {
+ if op.kind == IOP_FUNC {
+ v = find(c, op.s, nil, 0);
+ if !v || !v.func_defined {
return;
}
- v = find(c, d.name, n.s, 0);
- if v && v.var_defined {
- return;
- }
-
- v = find(c, n.s, nil, 0);
- if v && v.func_defined {
- mark_func_used(c, d, v);
- return;
- }
-
- cdie(c, "no such variable");
- } else if kind == N_CALL || kind == N_ASSIGN || kind == N_INDEX
- || kind == N_LT || kind == N_GT || kind == N_LE
- || kind == N_GE || kind == N_EQ || kind == N_NE
- || kind == N_BOR || kind == N_BAND || kind == N_ADD
- || kind == N_SUB || kind == N_MUL || kind == N_DIV
- || kind == N_MOD || kind == N_LSH || kind == N_RSH
- || kind == N_AND || kind == N_OR || kind == N_XOR {
- mark_expr_used(c, d, n.a);
- mark_expr_used(c, d, n.b);
- return;
- } else if kind == N_REF || kind == N_DEREF || kind == N_BNOT
- || kind == N_POS || kind == N_NEG || kind == N_NOT
- || kind == N_CAST || kind == N_DOT {
- mark_expr_used(c, d, n.a);
- return;
- } else if kind == N_SIZEOF || kind == N_STR || kind == N_NUM
- || kind == N_CHAR || kind == N_NIL {
- return;
- } else {
- cdie(c, "not an expression");
+ mark_func_used(c, d, v);
}
-}
-func mark_stmt_used(c: *compiler, d: *decl, n: *node) {
- var kind: int;
+ if op.a {
+ mark_ir_expr(c, d, op.a);
+ }
- if !n {
- return;
+ if op.b {
+ mark_ir_expr(c, d, op.b);
}
+}
- c.filename = n.filename;
- c.lineno = n.lineno;
- c.colno = n.colno;
+func mark_ir_used(c: *compiler, d: *decl) {
+ var ic: *irfunc;
+ var ib: *irblock;
+ var i: int;
+ var j: int;
- kind = n.kind;
- if kind == N_CONDLIST {
- loop {
- if !n {
- break;
- }
+ ic = d.func_ir;
- mark_expr_used(c, d, n.a.a);
+ i = 0;
+ loop {
+ if i == ic.blocks_len {
+ break;
+ }
- mark_stmt_used(c, d, n.a.b);
+ ib = ic.blocks[i];
- n = n.b;
- }
- return;
- } else if kind == N_STMTLIST {
+ j = 0;
loop {
- if !n {
+ if j == ib.ops_len {
break;
}
- mark_stmt_used(c, d, n.a);
+ mark_ir_expr(c, d, ib.ops[j]);
- n = n.b;
+ j = j + 1;
}
- return;
- } else if kind == N_LOOP {
- mark_stmt_used(c, d, n.a);
- return;
- } else if kind == N_RETURN {
- mark_expr_used(c, d, n.a);
- return;
- } else if kind == N_BREAK || kind == N_CONTINUE || kind == N_LABEL || kind == N_GOTO {
- return;
- } else if kind != N_VARDECL {
- mark_expr_used(c, d, n);
- return;
+
+ i = i + 1;
}
}
@@ -398,8 +327,8 @@ func check_usage(c: *compiler) {
}
c.used_top = d.used_next;
- if d.func_def {
- mark_stmt_used(c, d, d.func_def.b);
+ if d.func_ir {
+ mark_ir_used(c, d);
}
}
}
@@ -425,33 +354,28 @@ func defextern(c: *compiler, n: *node): *decl {
d.func_defined = 1;
d.func_type = t;
- d.func_decl = n;
return d;
}
-func defun(c: *compiler, n: *node) {
+func defun(c: *compiler, funcdef: *node) {
var d: *decl;
var name: *byte;
var v: *decl;
var t: *type;
- var offset: int;
-
- d = defextern(c, n.a);
-
- d.func_def = n;
+ var n: *node;
- n = n.a.b.a;
+ d = defextern(c, funcdef.a);
- offset = 16;
+ n = funcdef.a.b.a;
loop {
if (!n) {
break;
}
- c.filename = n.a.filename;
- c.lineno = n.a.lineno;
- c.colno = n.a.colno;
+ c.filename = n.filename;
+ c.lineno = n.lineno;
+ c.colno = n.colno;
c.s.filename = n.filename;
c.s.lineno = n.lineno;
@@ -465,17 +389,11 @@ func defun(c: *compiler, n: *node) {
v.var_defined = 1;
v.var_type = t;
- v.var_offset = offset;
- v.var_def = n.a;
- offset = offset + 8;
n = n.b;
}
- // Hoist locals
- offset = hoist_locals(c, d, d.func_def.b, 0);
-
- d.func_preamble = offset;
+ hoist_locals(c, d, funcdef.b);
}
func defstruct(c: *compiler, n: *node) {
@@ -528,8 +446,34 @@ func defunion(c: *compiler, n: *node) {
d.struct_type = prototype(c, n.a);
}
-func defenum(c: *compiler, n: *node) {
+func define_enum_tag(c: *compiler, name: *byte, value: int) {
+ var d: *decl;
+
+ d = find(c, name, nil, 1);
+
+ if d.enum_defined {
+ cdie(c, "duplicate enum");
+ }
+
+ d.enum_defined = 1;
+ d.enum_value = value;
+}
+
+func define_ir_func(c: *compiler, ic: *irfunc, t: *type) {
var d: *decl;
+
+ d = find(c, ic.name, nil, 1);
+
+ if d.func_defined {
+ cdie(c, "duplicate func");
+ }
+
+ d.func_defined = 1;
+ d.func_type = t;
+ d.func_ir = ic;
+}
+
+func defenum(c: *compiler, n: *node) {
var i: int;
var name: *byte;
@@ -545,19 +489,12 @@ func defenum(c: *compiler, n: *node) {
c.colno = n.a.colno;
name = n.a.a.s;
- d = find(c, name, nil, 1);
-
- if (d.enum_defined) {
- cdie(c, "duplicate enum");
- }
if (n.a.b) {
i = n.a.b.n;
}
- d.enum_defined = 1;
- d.enum_value = i;
- d.enum_def = n;
+ define_enum_tag(c, name, i);
i = i + 1;
n = n.b;
@@ -605,7 +542,6 @@ func layout_struct(c: *compiler, d: *decl) {
md.member_defined = 1;
md.member_type = t;
md.member_offset = offset;
- md.member_def = m;
offset = offset + type_sizeof(c, t);
@@ -658,7 +594,6 @@ func layout_union(c: *compiler, d: *decl) {
md.member_defined = 1;
md.member_type = t;
md.member_offset = 0;
- md.member_def = m;
member_size = type_sizeof(c, t);
if member_size > size {
@@ -1260,47 +1195,40 @@ func typecheck_stmt(c: *compiler, d: *decl, n: *node) {
}
}
-func typecheck_func(c: *compiler, d: *decl) {
- if (!d.func_def) {
- return;
- }
-
- typecheck_stmt(c, d, d.func_def.b);
-}
-
-func hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int {
+func hoist_locals(c: *compiler, d: *decl, n: *node) {
var kind: int;
var name: *byte;
var t: *type;
var v: *decl;
if (!n) {
- return offset;
+ return;
}
kind = n.kind;
if (kind == N_CONDLIST) {
loop {
if (!n) {
- return offset;
+ return;
}
- hoist_locals(c, d, n.a.b, offset);
+ hoist_locals(c, d, n.a.b);
n = n.b;
}
} else if (kind == N_STMTLIST) {
loop {
if (!n) {
- return offset;
+ return;
}
- offset = hoist_locals(c, d, n.a, offset);
+ hoist_locals(c, d, n.a);
n = n.b;
}
} else if (kind == N_LOOP) {
- return hoist_locals(c, d, n.a, offset);
+ hoist_locals(c, d, n.a);
+ return;
} else if (kind == N_LABEL) {
name = n.a.s;
v = find(c, d.name, name, 1);
@@ -1310,9 +1238,9 @@ func hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int {
}
v.goto_defined = 1;
- return offset;
+ return;
} else if (kind != N_VARDECL) {
- return offset;
+ return;
}
name = n.a.s;
@@ -1328,12 +1256,6 @@ func hoist_locals(c: *compiler, d: *decl, n: *node, offset: int): int {
v.var_type = t;
v.var_defined = 1;
-
- offset = offset + type_sizeof(c, t);
-
- v.var_offset = -offset;
-
- return offset;
}
func gather_include(c: *compiler, filename: *byte, slen: *int): *byte {
@@ -2210,7 +2132,6 @@ func main(argc: int, argv: **byte, envp: **byte) {
var input: *name_node;
var tmp: *name_node;
var link: **name_node;
- var pout_filename: *byte;
link = &input;
@@ -2272,16 +2193,6 @@ func main(argc: int, argv: **byte, envp: **byte) {
continue;
}
- if (!strcmp(argv[i], "-P")) {
- i = i + 1;
- if (i >= argc) {
- die("invalid -P at end of argument list");
- }
- pout_filename = argv[i];
- i = i + 1;
- continue;
- }
-
if (argv[i][0] == '-' as byte) {
die("invalid argument");
}
@@ -2295,10 +2206,6 @@ func main(argc: int, argv: **byte, envp: **byte) {
i = i + 1;
}
- if pout_filename {
- peg_open_output(c.p.pc, pout_filename);
- }
-
tmp = input;
loop {
if !tmp {
diff --git a/cc3.om b/cc3.om
@@ -133,6 +133,9 @@ ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]*
sp <- ( [ \r\n\t] / '//' (![\r\n] .)* )*
+// NOTE: This makes the language context sensitive by use of [ and ' <-
+// But, we can fix this by tweaking the peg grammar.
+
peg_grammar <- '%{' sp (peg_rule / peg_prefix)+ '%}' sp
peg_rule <- peg_identifier sp '<-' sp peg_pattern
peg_prefix <- '%prefix' sp peg_identifier sp
diff --git a/decl.om b/decl.om
@@ -11,10 +11,7 @@ struct decl {
func_defined: int;
func_type: *type;
func_label: *label;
- func_def: *node;
- func_decl: *node;
func_used: int;
- func_preamble: int;
func_ir: *irfunc;
struct_defined: int;
@@ -26,16 +23,12 @@ struct decl {
member_defined: int;
member_type: *type;
member_offset: int;
- member_def: *node;
enum_defined: int;
enum_value: int;
- enum_def: *node;
var_defined: int;
var_type: *type;
- var_offset: int;
- var_def: *node;
goto_defined: int;
goto_label: *label;
@@ -97,27 +90,21 @@ func find(c: *compiler, name: *byte, member_name: *byte, make: int): *decl {
d.func_defined = 0;
d.func_type = nil;
d.func_label = mklabel(c.s);
- d.func_def = nil;
d.func_used = 0;
d.struct_defined = 0;
d.struct_size = 0;
d.struct_layout_done = 0;
- d.struct_def = nil;
d.member_defined = 0;
d.member_type = nil;
d.member_offset = 0;
- d.member_def = nil;
d.enum_defined = 0;
d.enum_value = 0;
- d.enum_def = nil;
d.var_defined = 0;
d.var_type = nil;
- d.var_offset = 0;
- d.var_def = nil;
d.goto_defined = 0;
d.goto_label = mklabel(c.s);
diff --git a/ir.om b/ir.om
@@ -100,6 +100,9 @@ struct irfunc {
s: *assembler;
a: *alloc;
name: *byte;
+ filename: *byte;
+ lineno: int;
+ colno: int;
loopctx: *irloopctx;
blocks: **irblock;
blocks_len: int;
@@ -180,6 +183,42 @@ func mkirconst(ic: *irfunc, n: int): *irop {
return o;
}
+func mkirarg(ic: *irfunc, n: int, a: *irop): *irop {
+ var o: *irop;
+
+ o = mkirop(ic, IOP_ARG, a, nil);
+
+ o.n = n;
+
+ return o;
+}
+
+func mkirvarop(ic: *irfunc, name: *byte): *irop {
+ var iv: *irvar;
+ var o: *irop;
+
+ iv = *irfind_var(ic, name);
+ if !iv {
+ cdie(ic.c, "no such variable");
+ }
+
+ o = mkirop(ic, IOP_VAR, nil, nil);
+
+ o.n = iv.n;
+
+ return o;
+}
+
+func mkirretval(ic: *irfunc, a: *irop, t: *type): *irop {
+ var o: *irop;
+
+ o = mkirop(ic, IOP_RETVAL, a, nil);
+
+ o.t = t;
+
+ return o;
+}
+
func mkirstr(ic: *irfunc, s: *byte, slen: int): *irop {
var o: *irop;
@@ -191,7 +230,7 @@ func mkirstr(ic: *irfunc, s: *byte, slen: int): *irop {
return o;
}
-func mkirfunc(ic: *irfunc, name: *byte): *irop {
+func mkirfuncref(ic: *irfunc, name: *byte): *irop {
var o: *irop;
o = mkirop(ic, IOP_FUNC, nil, nil);
@@ -302,7 +341,33 @@ func iraddvar(ic: *irfunc, name: *byte, t: *type) {
*iv = mkirvar(ic, name, t);
}
-func ircall(ic: *irfunc, n: *node): *irop {
+func ircall(ic: *irfunc, fp: *irop, nargs: int) {
+ var o: *irop;
+ var cur: *irblock;
+ var next: *irblock;
+
+ // Emit the call
+ o = mkirop(ic, IOP_CALL, fp, nil);
+ o.n = nargs;
+ iraddop(ic, o);
+
+ // Link the return path
+ next = mkirblock(ic);
+
+ cur = ic.cur;
+ if cur {
+ if cur.done {
+ cdie(ic.c, "block already done");
+ }
+
+ cur.done = 1;
+ cur.out = next;
+ }
+
+ ic.cur = next;
+}
+
+func call_to_ir(ic: *irfunc, n: *node): *irop {
var o: *irop;
var a: *irop;
var b: *irop;
@@ -387,9 +452,7 @@ func ircall(ic: *irfunc, n: *node): *irop {
break;
}
- o = mkirop(ic, IOP_ARG, tmp[i], nil);
- o.n = i;
- o.t = arg.a.t;
+ o = mkirarg(ic, i, tmp[i]);
iraddop(ic, o);
arg = arg.b;
@@ -400,29 +463,10 @@ func ircall(ic: *irfunc, n: *node): *irop {
// Add a temporary for the return value
ret = mkirtmp(ic, n.t);
- o = mkirop(ic, IOP_RETVAL, ret, nil);
- o.t = n.t;
- iraddop(ic, o);
-
- // Emit the call
- o = mkirop(ic, IOP_CALL, fp, nil);
- o.n = count;
+ o = mkirretval(ic, ret, n.t);
iraddop(ic, o);
- // Link the return path
- next = mkirblock(ic);
-
- cur = ic.cur;
- if cur {
- if cur.done {
- cdie(ic.c, "block already done");
- }
-
- cur.done = 1;
- cur.out = next;
- }
-
- ic.cur = next;
+ ircall(ic, fp, count);
// Return an expression that contains the return value
return ret;
@@ -473,7 +517,7 @@ func expr_to_ir(ic: *irfunc, n: *node): *irop {
o = mkirstr(ic, n.s, strlen(n.s));
return o;
} else if kind == N_CALL {
- o = ircall(ic, n);
+ o = call_to_ir(ic, n);
return o;
} else if kind == N_IDENT {
v = find(ic.c, n.s, nil, 0);
@@ -494,7 +538,7 @@ func expr_to_ir(ic: *irfunc, n: *node): *irop {
// function
if v && v.func_defined {
- o = mkirfunc(ic, n.s);
+ o = mkirfuncref(ic, n.s);
return o;
}
@@ -1103,14 +1147,8 @@ func locals_to_ir(ic: *irfunc, n: *node) {
}
}
-func func_to_ir(c: *compiler, n: *node): *irfunc {
+func mkirfunc(c: *compiler, name: *byte): *irfunc {
var ic: *irfunc;
- var value: *irop;
- var t: *type;
-
- if !n {
- return nil;
- }
ic = alloc(c.a, sizeof(*ic)) as *irfunc;
@@ -1119,10 +1157,27 @@ func func_to_ir(c: *compiler, n: *node): *irfunc {
ic.s = c.s;
mkirblock(ic);
-
ic.cur = ic.blocks[0];
- ic.name = n.a.a.s;
+ ic.name = name;
+
+ return ic;
+}
+
+func func_to_ir(c: *compiler, n: *node): *irfunc {
+ var ic: *irfunc;
+ var value: *irop;
+ var t: *type;
+
+ if !n {
+ return nil;
+ }
+
+ ic = mkirfunc(c, n.a.a.s);
+
+ ic.filename = n.filename;
+ ic.lineno = n.lineno;
+ ic.colno = n.colno;
args_to_ir(ic, n.a.b.a);
@@ -1571,11 +1626,11 @@ func output_ir(c: *compiler, d: *decl) {
ic = d.func_ir;
- ic.c.filename = d.func_def.filename;
- ic.c.lineno = d.func_def.lineno;
- ic.c.colno = d.func_def.colno;
- ic.s.filename = d.func_def.filename;
- ic.s.lineno = d.func_def.lineno;
+ ic.c.filename = ic.filename;
+ ic.c.lineno = ic.lineno;
+ ic.c.colno = ic.colno;
+ ic.s.filename = ic.filename;
+ ic.s.lineno = ic.lineno;
fixup_label(c.s, d.func_label);
add_symbol(c.s, d.name, d.func_label);
diff --git a/ircout.om b/ircout.om
@@ -21,11 +21,11 @@ func ircout(c: *compiler) {
// Real entry point to translated entry point
fputs(c.cout, "int main(int argc, char **argv) {\n");
- fputs(c.cout, "\tmy__start((u)argc, (u)argv, 0);\n");
+ fputs(c.cout, "\tz_start((u)argc, (u)argv, 0);\n");
fputs(c.cout, "\treturn 0;\n");
fputs(c.cout, "}\n");
- fputs(c.cout, "s syscall(s nr, ...);\n");
- fputs(c.cout, "u my_syscall(u nr, u a, u b, u c, u d, u e, u f) {\n");
+ fputs(c.cout, "s syscall();\n");
+ fputs(c.cout, "u zsyscall(u nr, u a, u b, u c, u d, u e, u f) {\n");
fputs(c.cout, "\treturn (u)syscall((s)nr, (s)a, (s)b, (s)c, (s)d, (s)e, (s)f);\n");
fputs(c.cout, "}\n");
@@ -50,7 +50,7 @@ func ircdecl(c: *compiler, ic: *irfunc) {
var i: int;
var iv: *irvar;
- fputs(c.cout, "u my_");
+ fputs(c.cout, "u z");
fputs(c.cout, ic.name);
fputs(c.cout, "(");
@@ -62,7 +62,7 @@ func ircdecl(c: *compiler, ic: *irfunc) {
iv = ic.vars[i];
- fputs(c.cout, "u v_");
+ fputs(c.cout, "u v");
fputs(c.cout, iv.name);
i = i + 1;
@@ -83,8 +83,9 @@ func ircforward(c: *compiler, d: *decl) {
return;
}
- ircdecl(c, ic);
- fputs(c.cout, ";\n");
+ fputs(c.cout, "u z");
+ fputs(c.cout, ic.name);
+ fputs(c.cout, "();\n");
}
func ircdefine(c: *compiler, d: *decl) {
@@ -136,7 +137,7 @@ func ircdefine(c: *compiler, d: *decl) {
fputs(c.cout, "\tu ");
if iv.name {
- fputs(c.cout, "v_");
+ fputs(c.cout, "v");
fputs(c.cout, iv.name);
} else {
fputs(c.cout, "v");
@@ -259,6 +260,7 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) {
var i: int;
var op: *irop;
var kind: int;
+ var didlabel: int;
loop {
if i + 1 == ib.ops_len {
@@ -275,10 +277,11 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) {
ircline(c, op);
- if i == 0 {
+ if !didlabel {
fputs(c.cout, "b");
fputd(c.cout, ib.n);
fputs(c.cout, ":");
+ didlabel = 1;
}
fputs(c.cout, "\t");
@@ -294,20 +297,22 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) {
if kind == IOP_CALL {
ircline(c, op);
- if i == 0 {
+ if !didlabel {
fputs(c.cout, "b");
fputd(c.cout, ib.n);
fputs(c.cout, ":");
+ didlabel = 1;
}
irccall(c, ic, ib, op);
} else if kind == IOP_JUMP {
ircline(c, op);
- if i == 0 {
+ if !didlabel {
fputs(c.cout, "b");
fputd(c.cout, ib.n);
fputs(c.cout, ":");
+ didlabel = 1;
}
fputs(c.cout, "\tgoto b");
@@ -316,15 +321,16 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) {
} else if kind == IOP_BRANCH {
ircline(c, op);
- if i == 0 {
+ if !didlabel {
fputs(c.cout, "b");
fputd(c.cout, ib.n);
fputs(c.cout, ":");
+ didlabel = 1;
}
- fputs(c.cout, "\tif (!(");
- ircop(c, ic, ib, op.a);
- fputs(c.cout, ")) { goto b");
+ fputs(c.cout, "\tif (");
+ ircop_neg(c, ic, ib, op.a);
+ fputs(c.cout, ") { goto b");
fputd(c.cout, ib.alt.n);
fputs(c.cout, "; }\n");
@@ -337,10 +343,11 @@ func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) {
} else if kind == IOP_RETURN {
ircline(c, op);
- if i == 0 {
+ if !didlabel {
fputs(c.cout, "b");
fputd(c.cout, ib.n);
fputs(c.cout, ":");
+ didlabel = 1;
}
fputs(c.cout, "\treturn ");
@@ -392,6 +399,53 @@ func ircstr(c: *compiler, s: *byte, n: int) {
fputs(c.cout, "\"");
}
+func ircop_neg(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) {
+ var kind: int;
+
+ kind = op.kind;
+ if kind == IOP_EQ {
+ fputs(c.cout, "(");
+ ircop(c, ic, ib, op.a);
+ fputs(c.cout, ") != (");
+ ircop(c, ic, ib, op.b);
+ fputs(c.cout, ")");
+ } else if kind == IOP_NE {
+ fputs(c.cout, "(");
+ ircop(c, ic, ib, op.a);
+ fputs(c.cout, ") == (");
+ ircop(c, ic, ib, op.b);
+ fputs(c.cout, ")");
+ } else if kind == IOP_GT {
+ fputs(c.cout, "(s)(");
+ ircop(c, ic, ib, op.a);
+ fputs(c.cout, ") <= (s)(");
+ ircop(c, ic, ib, op.b);
+ fputs(c.cout, ")");
+ } else if kind == IOP_GE {
+ fputs(c.cout, "(s)(");
+ ircop(c, ic, ib, op.a);
+ fputs(c.cout, ") < (s)(");
+ ircop(c, ic, ib, op.b);
+ fputs(c.cout, ")");
+ } else if kind == IOP_LT {
+ fputs(c.cout, "(s)(");
+ ircop(c, ic, ib, op.a);
+ fputs(c.cout, ") >= (s)(");
+ ircop(c, ic, ib, op.b);
+ fputs(c.cout, ")");
+ } else if kind == IOP_LE {
+ fputs(c.cout, "(s)(");
+ ircop(c, ic, ib, op.a);
+ fputs(c.cout, ") > (s)(");
+ ircop(c, ic, ib, op.b);
+ fputs(c.cout, ")");
+ } else {
+ fputs(c.cout, "!(");
+ ircop(c, ic, ib, op);
+ fputs(c.cout, ")");
+ }
+}
+
func ircop(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) {
var kind: int;
var iv: *irvar;
@@ -420,14 +474,14 @@ func ircop(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) {
}
if iv.name {
- fputs(c.cout, "v_");
+ fputs(c.cout, "v");
fputs(c.cout, iv.name);
} else {
fputs(c.cout, "v");
fputd(c.cout, op.n);
}
} else if kind == IOP_FUNC {
- fputs(c.cout, "(u)my_");
+ fputs(c.cout, "(u)z");
fputs(c.cout, op.s);
} else if kind == IOP_CONST {
fputd(c.cout, op.n);
@@ -531,17 +585,17 @@ func ircop(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) {
ircop(c, ic, ib, op.b);
fputs(c.cout, ")");
} else if kind == IOP_EQ {
- fputs(c.cout, "(u)(((s)(");
+ fputs(c.cout, "(u)((");
ircop(c, ic, ib, op.a);
- fputs(c.cout, ")) == ((s)(");
+ fputs(c.cout, ") == (");
ircop(c, ic, ib, op.b);
- fputs(c.cout, ")))");
+ fputs(c.cout, "))");
} else if kind == IOP_NE {
- fputs(c.cout, "(u)(((s)(");
+ fputs(c.cout, "(u)((");
ircop(c, ic, ib, op.a);
- fputs(c.cout, ")) != ((s)(");
+ fputs(c.cout, ") != (");
ircop(c, ic, ib, op.b);
- fputs(c.cout, ")))");
+ fputs(c.cout, "))");
} else if kind == IOP_GT {
fputs(c.cout, "(u)(((s)(");
ircop(c, ic, ib, op.a);
@@ -580,7 +634,7 @@ func irccall(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) {
ircop(c, ic, ib, op.a);
fputs(c.cout, "))(");
} else {
- fputs(c.cout, "my_");
+ fputs(c.cout, "z");
fputs(c.cout, op.a.s);
fputs(c.cout, "(");
}
diff --git a/parse2.om b/parse2.om
@@ -5,16 +5,16 @@ struct parser {
err: *file;
}
-func setup_parser(a: *alloc, err: *file): *parser {
+func setup_parser(cc: *compiler, err: *file): *parser {
var c: *parser;
- c = alloc(a, sizeof(*c)) as *parser;
+ c = alloc(cc.a, sizeof(*c)) as *parser;
- c.a = a;
+ c.a = cc.a;
- c.p = peg_new("", "", 0, a, peg_P_grammar, P_tag_to_str);
+ c.p = peg_new("", "", 0, cc.a, peg_P_grammar, P_tag_to_str);
- c.pc = setup_peg(a);
+ c.pc = setup_peg(cc);
c.err = err;
diff --git a/peg.om b/peg.om
@@ -3,9 +3,10 @@
struct peg_compiler {
a: *alloc;
p: *peg;
- out: *file;
+ cc: *compiler;
scratch: *byte;
prefix: *byte;
+ prefix_len: int;
}
enum {
@@ -72,36 +73,42 @@ func decode_count(n: *peg_node): int {
}
}
-func translate_literal(c: *peg_compiler, n: *peg_node) {
- var i: int;
- var len: int;
- var ch: int;
- var hex: *byte;
-
- hex = "0123456789abcdef";
-
- fputs(c.out, " ok = literal(c, \"");
- i = 1;
- len = n.len - 1;
- loop {
- if i == len {
- break;
- }
-
- ch = n.str[i] as int;
-
- if ch < 32 || ch > 127 || ch == '\\' || ch == '"' {
- fputc(c.out, '\\');
- fputc(c.out, 'x');
- fputc(c.out, hex[ch >> 4] as int);
- fputc(c.out, hex[ch & 15] as int);
- } else {
- fputc(c.out, ch);
- }
-
- i = i + 1;
- }
- fputs(c.out, "\");\n");
+func translate_literal(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) {
+ var a: *irop;
+ var b: *irop;
+ var o: *irop;
+ var ret: *irop;
+ var t: *type;
+ var next: *irblock;
+ var s: *byte;
+
+ s = alloc(c.a, n.len - 1);
+ memcpy(s, &n.str[1], n.len - 2);
+ s[n.len - 1] = 0 as byte;
+
+ t = mktype0(c.cc, TY_INT);
+
+ next = mkirblock(ic);
+
+ // ret = literal(c, ${n.str});
+ ret = mkirtmp(ic, t);
+ o = mkirretval(ic, ret, t);
+ iraddop(ic, o);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirstr(ic, s, n.len - 2);
+ o = mkirarg(ic, 1, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "literal");
+ ircall(ic, a, 2);
+
+ // if ret == 0 { goto fail; }
+ a = ret;
+ b = mkirconst(ic, 0);
+ o = mkirop(ic, IOP_NE, a, b);
+
+ irbranch(ic, o, fail, next);
}
func parse_escape(s: *byte, i: *int, n: int): int {
@@ -153,14 +160,22 @@ func parse_escape(s: *byte, i: *int, n: int): int {
}
}
-func translate_charset(c: *peg_compiler, n: *peg_node) {
+func translate_charset(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) {
var i: int;
+ var j: int;
var len: int;
var ch: int;
var a: int;
var b: int;
var hex: *byte;
var count: int;
+ var oa: *irop;
+ var ob: *irop;
+ var o: *irop;
+ var ret: *irop;
+ var t: *type;
+ var next: *irblock;
+ var s: *byte;
hex = "0123456789abcdef";
@@ -222,84 +237,173 @@ func translate_charset(c: *peg_compiler, n: *peg_node) {
i = i + 1;
}
- if count == 0 {
- fputs(c.out, " fail(c);\n");
- fputs(c.out, " ok = 0;\n");
- return;
- } else if count >= 255 {
- fputs(c.out, " ok = any(c);\n");
- return;
- } else if count == 1 {
- fputs(c.out, " ok = literal(c, \"");
- } else {
- fputs(c.out, " ok = charset(c, \"");
- }
-
i = 1;
+ j = 0;
loop {
if i == 256 {
break;
}
if c.scratch[i] {
- if ch < 32 || ch > 127 || ch == '\\' || ch == '"' {
- fputc(c.out, '\\');
- fputc(c.out, 'x');
- fputc(c.out, hex[i >> 4] as int);
- fputc(c.out, hex[i & 15] as int);
- } else {
- fputc(c.out, i);
- }
+ c.scratch[j] = i as byte;
+ j = j + 1;
}
i = i + 1;
}
- fputs(c.out, "\");\n");
+
+ c.scratch[j] = 0 as byte;
+
+ s = alloc(c.a, j + 1);
+ memcpy(s, c.scratch, j);
+ s[j] = 0 as byte;
+
+ t = mktype0(c.cc, TY_INT);
+
+ next = mkirblock(ic);
+
+ // ret = charset(c, ${charset});
+ ret = mkirtmp(ic, t);
+ o = mkirretval(ic, ret, t);
+ iraddop(ic, o);
+ oa = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, oa);
+ iraddop(ic, o);
+ oa = mkirstr(ic, s, j);
+ o = mkirarg(ic, 1, oa);
+ iraddop(ic, o);
+ oa = mkirfuncref(ic, "charset");
+ ircall(ic, oa, 2);
+
+ // if ret == 0 { goto fail; }
+ oa = ret;
+ ob = mkirconst(ic, 0);
+ o = mkirop(ic, IOP_NE, oa, ob);
+
+ irbranch(ic, o, fail, next);
+}
+
+func translate_call(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) {
+ var prefix: *byte;
+ var prefix_len: int;
+ var result: *byte;
+ var len: int;
+ var a: *irop;
+ var b: *irop;
+ var o: *irop;
+ var ret: *irop;
+ var t: *type;
+ var next: *irblock;
+
+ prefix = c.prefix;
+ prefix_len = strlen(prefix);
+
+ len = prefix_len + n.len + 4;
+
+ result = alloc(c.a, len + 1);
+
+ memcpy(result, "peg_", 4);
+ memcpy(&result[4], prefix, prefix_len);
+ memcpy(&result[4 + prefix_len], n.str, n.len);
+ result[len] = 0 as byte;
+
+ next = mkirblock(ic);
+
+ t = mktype0(c.cc, TY_INT);
+
+ ret = mkirtmp(ic, t);
+
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+
+ o = mkirretval(ic, ret, t);
+ iraddop(ic, o);
+
+ a = mkirfuncref(ic, result);
+ ircall(ic, a, 1);
+
+ a = ret;
+ b = mkirconst(ic, 0);
+ o = mkirop(ic, IOP_NE, a, b);
+ irbranch(ic, o, fail, next);
}
-func translate_pattern(c: *peg_compiler, n: *peg_node) {
+func translate_pattern(c: *peg_compiler, ic: *irfunc, fail: *irblock, n: *peg_node) {
var count: int;
var look: int;
var d: *peg_node;
+ var a: *irop;
+ var b: *irop;
+ var o: *irop;
+ var ret: *irop;
+ var t: *type;
+ var next: *irblock;
+ var out: *irblock;
loop {
- if n.tag == P_peg_pattern {
+ if n.tag == P_peg_pattern { // A sequence of alternatives
d = n.child;
if !d.next {
- translate_pattern(c, d);
+ translate_pattern(c, ic, fail, d);
} else {
- fputs(c.out, " choice(c);\n");
- translate_pattern(c, d);
- d = d.next;
+ next = mkirblock(ic);
+ out = mkirblock(ic);
+
+ irjump(ic, next, next);
+
loop {
if !d {
break;
}
- fputs(c.out, " if !ok { choice(c);\n");
- translate_pattern(c, d);
- fputs(c.out, " }\n");
+ next = mkirblock(ic);
+
+ // next: choice(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "choice");
+ ircall(ic, a, 1);
+ // if !pattern { goto next; }
+ translate_pattern(c, ic, next, d);
+
+ // goto out;
+ irjump(ic, out, next);
d = d.next;
}
- fputs(c.out, " if ok { commit(c); } else { fail(c); }\n");
+
+ // next: fail(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "fail");
+ ircall(ic, a, 1);
+ // goto fail;
+ irjump(ic, fail, out);
+
+ // out: commit(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "commit");
+ ircall(ic, a, 1);
}
- } else if n.tag == P_peg_alternative {
+ } else if n.tag == P_peg_alternative { // A sequence of concatenations
d = n.child;
- translate_pattern(c, d);
+ translate_pattern(c, ic, fail, d);
d = d.next;
loop {
if !d {
break;
}
- fputs(c.out, " if ok {\n");
- translate_pattern(c, d);
- fputs(c.out, " }\n");
+ translate_pattern(c, ic, fail, d);
d = d.next;
}
- } else if n.tag == P_peg_lookahead {
+ } else if n.tag == P_peg_lookahead { // An and or a not pattern that does not consume
look = decode_look(n);
d = n.child;
if d.tag == P_peg_lookop {
@@ -307,59 +411,158 @@ func translate_pattern(c: *peg_compiler, n: *peg_node) {
}
if look == LOOK_AND {
- fputs(c.out, " choice(c);\n");
- translate_pattern(c, d);
- fputs(c.out, " fail(c);\n");
+ out = mkirblock(ic);
+ next = mkirblock(ic);
+
+ // choice(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "choice");
+ ircall(ic, a, 1);
+
+ // if !pattern { goto next; }
+ translate_pattern(c, ic, next, d);
+
+ // fail(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "fail");
+ ircall(ic, a, 1);
+
+ // goto out;
+ irjump(ic, out, next);
+
+ // next: fail(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "fail");
+ ircall(ic, a, 1);
+
+ // goto fail;
+ irjump(ic, fail, out);
+
+ // out:
} else if look == LOOK_NOT {
- fputs(c.out, " choice(c);\n");
- translate_pattern(c, d);
- fputs(c.out, " if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }\n");
+ out = mkirblock(ic);
+
+ // choice(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "choice");
+ ircall(ic, a, 1);
+
+ // if !pattern { goto out; }
+ translate_pattern(c, ic, out, d);
+
+ // fail(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "fail");
+ ircall(ic, a, 1);
+
+ // fail(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "fail");
+ ircall(ic, a, 1);
+
+ // goto fail;
+ irjump(ic, fail, out);
+
+ // out:
} else if look == LOOK_NORMAL {
- translate_pattern(c, d);
+ translate_pattern(c, ic, fail, d);
} else {
die("invalid lookop");
}
- } else if n.tag == P_peg_suffix {
+ } else if n.tag == P_peg_suffix { // Repetition operator
count = decode_count(n);
if count == ZERO_OR_ONE {
- fputs(c.out, " choice(c);\n");
- translate_pattern(c, n.child);
- fputs(c.out, " if ok { commit(c); } else { ok = 1; }\n");
+ // choice(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "choice");
+ ircall(ic, a, 1);
+
+ out = mkirblock(ic);
+
+ // if !pattern { goto out; }
+ translate_pattern(c, ic, out, n.child);
+
+ // commit(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "commit");
+ ircall(ic, a, 1);
+
+ // out:
+ irjump(ic, out, out);
} else if count == EXACTLY_ONE {
- translate_pattern(c, n.child);
+ translate_pattern(c, ic, fail, n.child);
} else if count == ZERO_OR_MORE {
- fputs(c.out, " loop {\n");
- fputs(c.out, " choice(c);\n");
- translate_pattern(c, n.child);
- fputs(c.out, " if !ok { ok = 1; break; }\n");
- fputs(c.out, " commit(c);\n");
- fputs(c.out, " }\n");
+zero_or_more: next = mkirblock(ic);
+ out = mkirblock(ic);
+
+ // next:
+ irjump(ic, next, next);
+
+ // choice(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "choice");
+ ircall(ic, a, 1);
+
+ // if !pattern { goto out; }
+ translate_pattern(c, ic, out, n.child);
+
+ // commit(c);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "commit");
+ ircall(ic, a, 1);
+
+ // goto next; out:
+ irjump(ic, next, out);
} else if count == ONE_OR_MORE {
- translate_pattern(c, n.child);
- fputs(c.out, " if ok {\n");
- fputs(c.out, " loop {\n");
- fputs(c.out, " choice(c);\n");
- translate_pattern(c, n.child);
- fputs(c.out, " if !ok { ok = 1; break; }\n");
- fputs(c.out, " commit(c);\n");
- fputs(c.out, " }\n");
- fputs(c.out, " }\n");
+ translate_pattern(c, ic, fail, n.child);
+ goto zero_or_more;
} else {
die("invalid countop");
}
} else if n.tag == P_peg_primary {
- translate_pattern(c, n.child);
+ translate_pattern(c, ic, fail, n.child);
} else if n.tag == P_peg_any {
- fputs(c.out, " ok = any(c);\n");
+ // if !any(c) { goto fail; }
+ next = mkirblock(ic);
+ t = mktype0(c.cc, TY_INT);
+ ret = mkirtmp(ic, t);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ o = mkirretval(ic, ret, t);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "any");
+ ircall(ic, a, 1);
+ a = ret;
+ b = mkirconst(ic, 0);
+ o = mkirop(ic, IOP_NE, a, b);
+ irbranch(ic, o, fail, next);
} else if n.tag == P_peg_literal {
- translate_literal(c, n);
+ translate_literal(c, ic, fail, n);
} else if n.tag == P_peg_class {
- translate_charset(c, n);
+ translate_charset(c, ic, fail, n);
} else if n.tag == P_peg_call {
- fputs(c.out, " ok = peg_");
- fputs(c.out, c.prefix);
- fputb(c.out, n.child.str, n.child.len);
- fputs(c.out, "(c);\n");
+ translate_call(c, ic, fail, n);
} else if n.tag == P_sp {
n = n.next;
continue;
@@ -379,69 +582,223 @@ func setprefix(c: *peg_compiler, s: *byte, n: int) {
p[n] = 0 as byte;
c.prefix = p;
+ c.prefix_len = n;
}
-func translate(c: *peg_compiler, n: *peg_node) {
- var v: *peg_node;
+func translate_rule(c: *peg_compiler, i: int, tag: *byte, v: *peg_node) {
+ var d: *decl;
+ var arg1_type: *type;
+ var args_type: *type;
+ var ret_type: *type;
+ var func_type: *type;
+ var ic: *irfunc;
+ var next: *irblock;
+ var fail: *irblock;
+ var o: *irop;
+ var a: *irop;
+ var b: *irop;
+ var name: *byte;
+ var tag_len: int;
+ var name_len: int;
+
+ tag_len = strlen(tag);
+ name_len = tag_len + 4;
+
+ name = alloc(c.a, name_len + 1);
+ memcpy(name, "peg_", 4);
+ memcpy(&name[4], tag, tag_len);
+ name[name_len] = 0 as byte;
+
+ d = find(c.cc, "peg", nil, 1);
+
+ arg1_type = mktype_struct(c.cc, d);
+ arg1_type = mktype1(c.cc, TY_PTR, arg1_type);
+ args_type = mktype1(c.cc, TY_ARG, arg1_type);
+
+ ret_type = mktype0(c.cc, TY_INT);
+
+ // func(*peg):int
+ func_type = mktype2(c.cc, TY_FUNC, ret_type, args_type);
+
+ ic = mkirfunc(c.cc, name);
+
+ iraddarg(ic, "c", arg1_type);
+
+ fail = mkirblock(ic);
+
+ // enter(c, $tag);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirconst(ic, i);
+ o = mkirarg(ic, 1, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "enter");
+ ircall(ic, a, 2);
+
+ //if !pattern { goto fail; }
+ translate_pattern(c, ic, fail, v.child.next);
+
+ // leave(c, $tag);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirconst(ic, i);
+ o = mkirarg(ic, 1, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "leave");
+ ircall(ic, a, 2);
+
+ // ok: return 1;
+ o = mkirconst(ic, 1);
+ irreturn(ic, o);
+
+ // fail: fail(c);
+ irjump(ic, fail, fail);
+ a = mkirvarop(ic, "c");
+ o = mkirarg(ic, 0, a);
+ iraddop(ic, o);
+ a = mkirfuncref(ic, "fail");
+ ircall(ic, a, 1);
+
+ // return 0;
+ o = mkirconst(ic, 0);
+ irreturn(ic, o);
+
+ define_ir_func(c.cc, ic, func_type);
+}
- v = n.child;
+func translate_rules(c: *peg_compiler, tags: **byte, nodes: **peg_node, num_rules: int) {
+ var i: int;
+
+ i = 0;
loop {
- if !v {
+ if i == num_rules {
break;
}
- if v.tag == P_peg_prefix {
- setprefix(c, v.child.str, v.child.len);
- }
+ translate_rule(c, i, tags[i], nodes[i]);
- v = v.next;
+ i = i + 1;
}
+}
- // Generate tags for each rule
- fputs(c.out, "enum {\n");
- v = n.child;
+func translate_tag_to_str(c: *peg_compiler, tags: **byte, num_rules: int) {
+ var name: *byte;
+ var tag: *byte;
+ var taglen: int;
+ var i: int;
+ var arg1_type: *type;
+ var args_type: *type;
+ var ret_type: *type;
+ var func_type: *type;
+ var ic: *irfunc;
+ var o: *irop;
+ var a: *irop;
+ var b: *irop;
+ var this: *irblock;
+ var next: *irblock;
+
+ name = make_tag(c, "tag_to_str", 10);
+
+ arg1_type = mktype0(c.cc, TY_INT);
+ args_type = mktype1(c.cc, TY_ARG, arg1_type);
+
+ ret_type = mktype0(c.cc, TY_BYTE);
+ ret_type = mktype1(c.cc, TY_PTR, ret_type);
+
+ // func(int):*byte;
+ func_type = mktype2(c.cc, TY_FUNC, ret_type, args_type);
+
+ ic = mkirfunc(c.cc, name);
+
+ iraddarg(ic, "tag", arg1_type);
+
+ i = 0;
loop {
- if !v {
+ if i == num_rules {
break;
}
- if v.tag == P_peg_rule {
- fputs(c.out, " ");
- fputs(c.out, c.prefix);
- fputb(c.out, v.child.str, v.child.len);
- fputs(c.out, ",\n");
- }
+ tag = tags[i];
+ taglen = strlen(tag);
- v = v.next;
+ this = mkirblock(ic);
+ next = mkirblock(ic);
+
+ // `if tag == ,i { return ,tag; }
+ a = mkirop(ic, IOP_VAR, nil, nil);
+ a.n = 0;
+ b = mkirconst(ic, i);
+ o = mkirop(ic, IOP_EQ, a, b);
+ irbranch(ic, o, next, this);
+ o = mkirstr(ic, tag, taglen);
+ irreturn(ic, o);
+
+ irjump(ic, next, next);
+
+ i = i + 1;
}
- fputs(c.out, "}\n\n");
- // Generate tag to string
- fputs(c.out, "func ");
- fputs(c.out, c.prefix);
- fputs(c.out, "tag_to_str(tag: int): *byte {\n");
+ // return nil
+ o = mkirconst(ic, 0);
+ irreturn(ic, o);
+
+ define_ir_func(c.cc, ic, func_type);
+}
+
+func make_tag(c: *peg_compiler, suffix: *byte, suffix_len: int): *byte {
+ var prefix: *byte;
+ var prefix_len: int;
+ var result: *byte;
+ var len: int;
+
+ prefix = c.prefix;
+ prefix_len = strlen(prefix);
+
+ len = prefix_len + suffix_len;
+
+ result = alloc(c.a, len + 1);
+
+ memcpy(result, prefix, prefix_len);
+ memcpy(&result[prefix_len], suffix, suffix_len);
+ result[len] = 0 as byte;
+
+ return result;
+}
+
+func translate(c: *peg_compiler, n: *peg_node) {
+ var v: *peg_node;
+ var i: int;
+ var num_rules: int;
+ var tags: **byte;
+ var nodes: **peg_node;
+ var tag: *byte;
+
+ // Find the prefix
v = n.child;
+ num_rules = 0;
loop {
if !v {
break;
}
+ if v.tag == P_peg_prefix {
+ setprefix(c, v.child.str, v.child.len);
+ }
+
if v.tag == P_peg_rule {
- fputs(c.out, " if tag == ");
- fputs(c.out, c.prefix);
- fputb(c.out, v.child.str, v.child.len);
- fputs(c.out, " { return \"");
- fputb(c.out, v.child.str, v.child.len);
- fputs(c.out, "\"; }\n");
+ num_rules = num_rules + 1;
}
v = v.next;
}
- fputs(c.out, " die(\"invalid tag\");\n");
- fputs(c.out, " return nil;\n");
- fputs(c.out, "}\n");
- // Generate parsing functions for each rule
+ tags = alloc(c.a, num_rules * sizeof(*tags)) as **byte;
+ nodes = alloc(c.a, num_rules * sizeof(*nodes)) as **peg_node;
+
+ // Generate tags for each rule
+ i = 0;
v = n.child;
loop {
if !v {
@@ -449,63 +806,33 @@ func translate(c: *peg_compiler, n: *peg_node) {
}
if v.tag == P_peg_rule {
- fputs(c.out, "\nfunc peg_");
- fputs(c.out, c.prefix);
- fputb(c.out, v.child.str, v.child.len);
- fputs(c.out, "(c: *peg): int {\n");
- fputs(c.out, " var ok: int;\n");
- fputs(c.out, " enter(c, ");
- fputs(c.out, c.prefix);
- fputb(c.out, v.child.str, v.child.len);
- fputs(c.out, ");\n");
- translate_pattern(c, v.child.next);
- fputs(c.out, " if ok { leave(c, ");
- fputs(c.out, c.prefix);
- fputb(c.out, v.child.str, v.child.len);
- fputs(c.out, "); } else { fail(c); }\n");
- fputs(c.out, " return ok;\n");
- fputs(c.out, "}\n");
+ tag = make_tag(c, v.child.str, v.child.len);
+ define_enum_tag(c.cc, tag, i);
+ tags[i] = tag;
+ nodes[i] = v;
+ i = i + 1;
}
v = v.next;
}
+
+ // Generate tag to string
+ translate_tag_to_str(c, tags, num_rules);
+
+ // Generate parsing functions for each rule
+ translate_rules(c, tags, nodes, num_rules);
}
-func setup_peg(a: *alloc): *peg_compiler {
+func setup_peg(cc: *compiler): *peg_compiler {
var c: *peg_compiler;
- c = alloc(a, sizeof(*c)) as *peg_compiler;
- c.a = a;
+ c = alloc(cc.a, sizeof(*c)) as *peg_compiler;
+ c.a = cc.a;
c.prefix = "";
- c.scratch = alloc(c.a, 256);
+ c.scratch = alloc(cc.a, 256);
+ c.cc = cc;
return c;
}
-func peg_open_output(c: *peg_compiler, filename: *byte) {
- var fd: int;
- var f: *file;
-
- unlink(filename);
-
- fd = open(filename, O_CREAT | O_WRONLY, (6 << 6) + (6 << 3) + 6);
- if fd < 0 {
- die("peg_open_output: failed to open output");
- }
-
- f = fopen(fd, c.a);
- c.out = f;
-}
-
func peg_compile(c: *peg_compiler, pn: *peg_node, err: *file) {
- var node: *peg_node;
-
- if !c.out {
- return;
- }
-
- c.p = peg_new(pn.filename, pn.str, pn.len, c.a, peg_P_peg_grammar, P_tag_to_str);
-
- node = peg_parse(c.p, P_sp, err);
- translate(c, node);
-
- fflush(c.out);
+ translate(c, pn);
}