commit f9c585acac4425598b1b0b0e739d1a33c22537c3
parent 1dde52de096e9a61c09328e7965a7638f8bba18d
Author: erai <erai@omiltem.net>
Date: Sun, 6 Apr 2025 01:19:15 +0000
compile lexer to nfa
Diffstat:
M | bootstrap.sh | | | 2 | +- |
M | cc0.c | | | 1029 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- |
M | cc3.om | | | 21 | +++++++++++---------- |
M | lexer.om | | | 414 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
M | lib.om | | | 8 | ++++++++ |
5 files changed, 1367 insertions(+), 107 deletions(-)
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -9,6 +9,6 @@ SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om i
./cc0 ${LIBS} ${SOURCES} cc3.om -o cc1 -n cc1.lines -G cc1.call
# Double check the bootstrap and self hosting compiler have the same output
-./cc1 ${LIBS} ${SOURCES} cc3.om -C cc2.c -o cc2 -n cc2.lines -G cc2.call
+./cc1 ${LIBS} ${SOURCES} cc3.om cc4.om -C cc2.c -o cc2 -n cc2.lines -G cc2.call
cmp cc1 cc2 || echo cc mismatch
cmp cc0.c cc2.c || echo bootstrap mismatch
diff --git a/cc0.c b/cc0.c
@@ -130,9 +130,19 @@ u zirreset();
u zirreturn();
u ziruseop();
u zlabels_to_ir();
+u zlalr_compiler();
u zlayout_struct();
u zlayout_union();
u zleave();
+u zlexer_compile();
+u zlexer_compile_alternative();
+u zlexer_compile_any();
+u zlexer_compile_charset();
+u zlexer_compile_pattern();
+u zlexer_compile_primary();
+u zlexer_compile_rule();
+u zlexer_compile_str();
+u zlexer_compile_suffix();
u zliteral();
u zlocals_to_ir();
u zmain();
@@ -167,6 +177,14 @@ u zmktype_struct();
u zmktype_union();
u zmmap();
u znext_decl();
+u znfa_alt();
+u znfa_any();
+u znfa_concat();
+u znfa_empty();
+u znfa_lit();
+u znfa_plus();
+u znfa_qmark();
+u znfa_star();
u znode_to_str();
u zopen();
u zopen_call_out();
@@ -251,6 +269,7 @@ u zpeg_P_lexer_op();
u zpeg_P_lexer_pattern();
u zpeg_P_lexer_primary();
u zpeg_P_lexer_rule();
+u zpeg_P_lexer_str();
u zpeg_P_lexer_suffix();
u zpeg_P_loop();
u zpeg_P_loop_stmt();
@@ -609,38 +628,40 @@ b210: if (vtag != 105UL) goto b212;
b212: if (vtag != 106UL) goto b214;
return (u)"P_peg_identifier";
b214: if (vtag != 107UL) goto b216;
- return (u)"P_lalr_primary";
+ return (u)"P_lexer_dot";
b216: if (vtag != 108UL) goto b218;
- return (u)"P_lalr_op";
+ return (u)"P_lexer_op";
b218: if (vtag != 109UL) goto b220;
- return (u)"P_lalr_suffix";
+ return (u)"P_lexer_str";
b220: if (vtag != 110UL) goto b222;
- return (u)"P_lalr_alternative";
+ return (u)"P_lexer_charset";
b222: if (vtag != 111UL) goto b224;
- return (u)"P_lalr_pattern";
+ return (u)"P_lexer_primary";
b224: if (vtag != 112UL) goto b226;
- return (u)"P_lalr_rule";
+ return (u)"P_lexer_suffix";
b226: if (vtag != 113UL) goto b228;
- return (u)"P_lalr_grammar";
+ return (u)"P_lexer_alternative";
b228: if (vtag != 114UL) goto b230;
- return (u)"P_lexer_dot";
+ return (u)"P_lexer_pattern";
b230: if (vtag != 115UL) goto b232;
- return (u)"P_lexer_op";
+ return (u)"P_lexer_rule";
b232: if (vtag != 116UL) goto b234;
- return (u)"P_lexer_charset";
+ return (u)"P_lexer_grammar";
b234: if (vtag != 117UL) goto b236;
- return (u)"P_lexer_primary";
+ return (u)"P_lalr_primary";
b236: if (vtag != 118UL) goto b238;
- return (u)"P_lexer_suffix";
+ return (u)"P_lalr_op";
b238: if (vtag != 119UL) goto b240;
- return (u)"P_lexer_alternative";
+ return (u)"P_lalr_suffix";
b240: if (vtag != 120UL) goto b242;
- return (u)"P_lexer_pattern";
+ return (u)"P_lalr_alternative";
b242: if (vtag != 121UL) goto b244;
- return (u)"P_lexer_rule";
+ return (u)"P_lalr_pattern";
b244: if (vtag != 122UL) goto b246;
- return (u)"P_lexer_grammar";
-b246: return 0UL;
+ return (u)"P_lalr_rule";
+b246: if (vtag != 123UL) goto b248;
+ return (u)"P_lalr_grammar";
+b248: return 0UL;
}
u z_start(u vargc, u vargv, u venvp) {
u v3 = 0;
@@ -22476,6 +22497,9 @@ b34: goto b7;
b5: v4 = 1UL;
goto b6;
}
+u zlalr_compiler(u vc, u vpn, u verr) {
+ return 0UL;
+}
u zlayout_struct(u vc, u vd) {
u vm = 0;
u voffset = 0;
@@ -22721,6 +22745,504 @@ b7: *(u*)(vc + 168UL) = *(u*)(vc + 168UL) * 2UL;
goto b5;
b4: goto b2;
}
+u zlexer_compile(u vc, u vpn, u verr) {
+ u va = 0;
+ u vb = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ vpn = *(u*)(vpn + 16UL);
+b1: if (!vpn) goto b7;
+ v5 = 0UL;
+b8: if (!v5) goto b5;
+ return 0UL;
+b5: v6 = (u)zlexer_compile_rule;
+ v7 = vc;
+ v8 = vpn;
+ v9 = ((u(*)())v6)(v7, v8);
+ vb = v9;
+ v10 = (u)znfa_alt;
+ v11 = vc;
+ v12 = va;
+ v13 = vb;
+ v14 = ((u(*)())v10)(v11, v12, v13);
+ va = v14;
+ vpn = *(u*)(vpn + 8UL);
+ goto b1;
+b7: v5 = 1UL;
+ goto b8;
+}
+u zlexer_compile_alternative(u vc, u vpn) {
+ u vpart = 0;
+ u va = 0;
+ u vb = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ vpart = *(u*)(vpn + 16UL);
+ v5 = (u)znfa_empty;
+ v6 = vc;
+ v7 = ((u(*)())v5)(v6);
+ va = v7;
+b2: if (!vpart) goto b8;
+ v8 = 0UL;
+b9: if (!v8) goto b6;
+ return va;
+b6: v9 = (u)zlexer_compile_suffix;
+ v10 = vc;
+ v11 = vpart;
+ v12 = ((u(*)())v9)(v10, v11);
+ vb = v12;
+ v13 = (u)znfa_concat;
+ v14 = vc;
+ v15 = va;
+ v16 = vb;
+ v17 = ((u(*)())v13)(v14, v15, v16);
+ va = v17;
+ vpart = *(u*)(vpart + 8UL);
+ goto b2;
+b8: v8 = 1UL;
+ goto b9;
+}
+u zlexer_compile_any(u vc, u vpn) {
+ u v2 = 0;
+ u v3 = 0;
+ u v4 = 0;
+ v2 = (u)znfa_any;
+ v3 = vc;
+ v4 = ((u(*)())v2)(v3);
+ return v4;
+}
+u zlexer_compile_charset(u vc, u vpn) {
+ u vi = 0;
+ u vj = 0;
+ u vlen = 0;
+ u vch = 0;
+ u vx = 0;
+ u vok = 0;
+ u vs = 0;
+ u vscratch = 0;
+ u vstart = 0;
+ u vend = 0;
+ u va = 0;
+ u vb = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ u v26 = 0;
+ u v27 = 0;
+ u v28 = 0;
+ u v29 = 0;
+ u v30 = 0;
+ u v31 = 0;
+ u v32 = 0;
+ u v33 = 0;
+ u v34 = 0;
+ u v35 = 0;
+ u v36 = 0;
+ u v37 = 0;
+ u v38 = 0;
+ u v39 = 0;
+ u v40 = 0;
+ u v41 = 0;
+ u v42 = 0;
+ u v43 = 0;
+ u v44 = 0;
+ u v45 = 0;
+ u v46 = 0;
+ u v47 = 0;
+ u v48 = 0;
+ u v49 = 0;
+ u v50 = 0;
+ u v51 = 0;
+ v14 = (u)zalloc;
+ v15 = *(u*)(vc + 0UL);
+ v16 = 256UL;
+ v17 = ((u(*)())v14)(v15, v16);
+ vscratch = v17;
+ vs = *(u*)(vpn + 24UL);
+ vi = 2UL;
+ vlen = *(u*)(vpn + 32UL) - 2UL;
+ vx = 1UL;
+b2: if (vi != vlen) goto b6;
+ vj = 0UL;
+b40: if (vj != 256UL) goto b44;
+ v48 = (u)zfree;
+ v49 = *(u*)(vc + 0UL);
+ v50 = vscratch;
+ v51 = ((u(*)())v48)(v49, v50);
+ v51;
+ return va;
+b44: if (!(u)*(b*)(vscratch + vj * 1UL)) goto b47;
+ v39 = (u)znfa_lit;
+ v40 = vc;
+ v41 = vj;
+ v42 = ((u(*)())v39)(v40, v41);
+ vb = v42;
+ v43 = (u)znfa_alt;
+ v44 = vc;
+ v45 = va;
+ v46 = vb;
+ v47 = ((u(*)())v43)(v44, v45, v46);
+ va = v47;
+b45: vj = vj + 1UL;
+ goto b40;
+b47: goto b45;
+b6: vch = (u)*(b*)(vs + vi * 1UL);
+ if (vch != 92UL) goto b9;
+ v18 = (u)zunescape;
+ v19 = vs;
+ v20 = (u)&vi;
+ v21 = vlen;
+ v22 = (u)&vok;
+ v23 = ((u(*)())v18)(v19, v20, v21, v22);
+ vch = v23;
+ if (!vok) goto b15;
+ v24 = 0UL;
+b16: if (!v24) goto b13;
+ v25 = (u)zdie;
+ v26 = (u)"lexer_compile_charset: invalid escape";
+ v27 = ((u(*)())v25)(v26);
+ v27;
+b11:b7: vstart = vch;
+ vend = vch;
+ if (vi == vlen) goto b24;
+ if ((u)*(b*)(vs + vi * 1UL) != 45UL) goto b24;
+ v28 = 1UL;
+b26: if (!v28) goto b22;
+ vi = vi + 1UL;
+ v29 = (u)zunescape;
+ v30 = vs;
+ v31 = (u)&vi;
+ v32 = vlen;
+ v33 = (u)&vok;
+ v34 = ((u(*)())v29)(v30, v31, v32, v33);
+ vch = v34;
+ if (!vok) goto b32;
+ v35 = 0UL;
+b33: if (!v35) goto b30;
+ v36 = (u)zdie;
+ v37 = (u)"lexer_compile_charset: invalid escape";
+ v38 = ((u(*)())v36)(v37);
+ v38;
+b28: vend = vch;
+b20: vj = vstart;
+b35: if ((s)vj <= (s)vend) goto b39;
+ goto b2;
+b39: *(b*)(vscratch + vj * 1UL) = vx;
+ vj = vj + 1UL;
+ goto b35;
+b30: goto b28;
+b32: v35 = 1UL;
+ goto b33;
+b22: goto b20;
+b24: v28 = 0UL;
+ goto b26;
+b13: goto b11;
+b15: v24 = 1UL;
+ goto b16;
+b9: if (vch != 94UL) goto b19;
+ vx = 0UL;
+ vi = vi + 1UL;
+ goto b2;
+b19: vi = vi + 1UL;
+ goto b7;
+}
+u zlexer_compile_pattern(u vc, u vpn) {
+ u valt = 0;
+ u va = 0;
+ u vb = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ valt = *(u*)(vpn + 16UL);
+ va = 0UL;
+b1: if (!valt) goto b7;
+ v5 = 0UL;
+b8: if (!v5) goto b5;
+ return va;
+b5: v6 = (u)zlexer_compile_alternative;
+ v7 = vc;
+ v8 = valt;
+ v9 = ((u(*)())v6)(v7, v8);
+ vb = v9;
+ v10 = (u)znfa_alt;
+ v11 = vc;
+ v12 = va;
+ v13 = vb;
+ v14 = ((u(*)())v10)(v11, v12, v13);
+ va = v14;
+ valt = *(u*)(valt + 8UL);
+ goto b1;
+b7: v5 = 1UL;
+ goto b8;
+}
+u zlexer_compile_primary(u vc, u vpn) {
+ u vtag = 0;
+ u v3 = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ vpn = *(u*)(vpn + 16UL);
+ vtag = *(u*)(vpn + 0UL);
+ if (vtag != 114UL) goto b3;
+ v3 = (u)zlexer_compile_pattern;
+ v4 = vc;
+ v5 = vpn;
+ v6 = ((u(*)())v3)(v4, v5);
+ return v6;
+b3: if (vtag != 107UL) goto b6;
+ v7 = (u)zlexer_compile_any;
+ v8 = vc;
+ v9 = vpn;
+ v10 = ((u(*)())v7)(v8, v9);
+ return v10;
+b6: if (vtag != 109UL) goto b9;
+ v11 = (u)zlexer_compile_str;
+ v12 = vc;
+ v13 = vpn;
+ v14 = ((u(*)())v11)(v12, v13);
+ return v14;
+b9: if (vtag != 110UL) goto b12;
+ v15 = (u)zlexer_compile_charset;
+ v16 = vc;
+ v17 = vpn;
+ v18 = ((u(*)())v15)(v16, v17);
+ return v18;
+b12: v19 = (u)zdie;
+ v20 = (u)"invalid lexer_primary";
+ v21 = ((u(*)())v19)(v20);
+ v21;
+ return 0UL;
+}
+u zlexer_compile_rule(u vc, u vpn) {
+ u vident = 0;
+ u vpat = 0;
+ u va = 0;
+ u vb = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ vident = *(u*)(vpn + 16UL);
+ vpat = *(u*)(vident + 8UL);
+ v6 = (u)zlexer_compile_pattern;
+ v7 = vc;
+ v8 = vpat;
+ v9 = ((u(*)())v6)(v7, v8);
+ va = v9;
+ v10 = (u)znfa_empty;
+ v11 = vc;
+ v12 = ((u(*)())v10)(v11);
+ vb = v12;
+ *(u*)(vb + 0UL) = 1UL;
+ v13 = (u)znfa_concat;
+ v14 = vc;
+ v15 = va;
+ v16 = vb;
+ v17 = ((u(*)())v13)(v14, v15, v16);
+ return v17;
+}
+u zlexer_compile_str(u vc, u vpn) {
+ u vi = 0;
+ u vlen = 0;
+ u vok = 0;
+ u vch = 0;
+ u va = 0;
+ u vb = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ u v26 = 0;
+ u v27 = 0;
+ u v28 = 0;
+ u v29 = 0;
+ vi = 1UL;
+ vlen = *(u*)(vpn + 32UL) - 1UL;
+ v8 = (u)znfa_empty;
+ v9 = vc;
+ v10 = ((u(*)())v8)(v9);
+ va = v10;
+b2: if (vi != vlen) goto b6;
+ return va;
+b6: v11 = (u)zunescape;
+ v12 = *(u*)(vpn + 24UL);
+ v13 = (u)&vi;
+ v14 = vlen;
+ v15 = (u)&vok;
+ v16 = ((u(*)())v11)(v12, v13, v14, v15);
+ vch = v16;
+ if (!vok) goto b12;
+ v17 = 0UL;
+b13: if (!v17) goto b10;
+ v18 = (u)zdie;
+ v19 = (u)"lexer_compile_str: invalid escape";
+ v20 = ((u(*)())v18)(v19);
+ v20;
+b8: v21 = (u)znfa_lit;
+ v22 = vc;
+ v23 = vch;
+ v24 = ((u(*)())v21)(v22, v23);
+ vb = v24;
+ v25 = (u)znfa_concat;
+ v26 = vc;
+ v27 = va;
+ v28 = vb;
+ v29 = ((u(*)())v25)(v26, v27, v28);
+ va = v29;
+ goto b2;
+b10: goto b8;
+b12: v17 = 1UL;
+ goto b13;
+}
+u zlexer_compile_suffix(u vc, u vpn) {
+ u vprimary = 0;
+ u vop = 0;
+ u vn = 0;
+ u vch = 0;
+ u vzero = 0;
+ u vmore = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ vprimary = *(u*)(vpn + 16UL);
+ v8 = (u)zlexer_compile_primary;
+ v9 = vc;
+ v10 = vprimary;
+ v11 = ((u(*)())v8)(v9, v10);
+ vn = v11;
+ vzero = 0UL;
+ vmore = 0UL;
+ vop = *(u*)(vprimary + 8UL);
+b2: if (!vop) goto b8;
+ v12 = 0UL;
+b9: if (!v12) goto b6;
+ if (!vzero) goto b21;
+ if (!vmore) goto b21;
+ v13 = 1UL;
+b23: if (!v13) goto b19;
+ v14 = (u)znfa_star;
+ v15 = vc;
+ v16 = vn;
+ v17 = ((u(*)())v14)(v15, v16);
+ return v17;
+b19: if (!vzero) goto b26;
+ v18 = (u)znfa_qmark;
+ v19 = vc;
+ v20 = vn;
+ v21 = ((u(*)())v18)(v19, v20);
+ return v21;
+b26: if (!vmore) goto b29;
+ v22 = (u)znfa_plus;
+ v23 = vc;
+ v24 = vn;
+ v25 = ((u(*)())v22)(v23, v24);
+ return v25;
+b29: return vn;
+b21: v13 = 0UL;
+ goto b23;
+b6: vch = (u)*(b*)(*(u*)(vop + 24UL) + 0UL * 1UL);
+ if (vch != 42UL) goto b12;
+ vzero = 1UL;
+ vmore = 1UL;
+b10: vop = *(u*)(vop + 8UL);
+ goto b2;
+b12: if (vch != 43UL) goto b14;
+ vmore = 1UL;
+ goto b10;
+b14: if (vch != 63UL) goto b16;
+ vzero = 1UL;
+ goto b10;
+b16: goto b10;
+b8: v12 = 1UL;
+ goto b9;
+}
u zliteral(u vc, u vs) {
u vi = 0;
u vch = 0;
@@ -23968,6 +24490,296 @@ u znext_decl(u vc, u vd) {
v4 = ((u(*)())v2)(v3);
return v4;
}
+u znfa_alt(u vc, u va, u vb) {
+ u vret = 0;
+ u vn = 0;
+ u vtail = 0;
+ u vi = 0;
+ u vj = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ if (!va) goto b5;
+ v8 = 0UL;
+b6: if (!v8) goto b3;
+ return vb;
+b3: if (!vb) goto b11;
+ v9 = 0UL;
+b12: if (!v9) goto b9;
+ return va;
+b9: v10 = (u)zalloc;
+ v11 = *(u*)(vc + 0UL);
+ v12 = 48UL;
+ v13 = ((u(*)())v10)(v11, v12);
+ vn = v13;
+ *(u*)(vn + 8UL) = -1UL;
+ *(u*)(vn + 16UL) = va;
+ *(u*)(vn + 24UL) = vb;
+ *(u*)(vn + 40UL) = *(u*)(va + 40UL) + *(u*)(vb + 40UL);
+ v14 = (u)zalloc;
+ v15 = *(u*)(vc + 0UL);
+ v16 = 8UL * *(u*)(vn + 40UL);
+ v17 = ((u(*)())v14)(v15, v16);
+ *(u*)(vn + 32UL) = v17;
+ vi = 0UL;
+ vj = 0UL;
+b15: if (vi != *(u*)(va + 40UL)) goto b19;
+b20: if (vi != *(u*)(vb + 40UL)) goto b24;
+ v18 = (u)zfree;
+ v19 = *(u*)(vc + 0UL);
+ v20 = *(u*)(va + 32UL);
+ v21 = ((u(*)())v18)(v19, v20);
+ v21;
+ v22 = (u)zfree;
+ v23 = *(u*)(vc + 0UL);
+ v24 = *(u*)(vb + 32UL);
+ v25 = ((u(*)())v22)(v23, v24);
+ v25;
+ return vret;
+b24: *(u*)(*(u*)(vn + 32UL) + vj * 8UL) = *(u*)(*(u*)(vb + 32UL) + vi * 8UL);
+ vj = vj + 1UL;
+ vi = vi + 1UL;
+ goto b20;
+b19: *(u*)(*(u*)(vn + 32UL) + vj * 8UL) = *(u*)(*(u*)(va + 32UL) + vi * 8UL);
+ vj = vj + 1UL;
+ vi = vi + 1UL;
+ goto b15;
+b11: v9 = 1UL;
+ goto b12;
+b5: v8 = 1UL;
+ goto b6;
+}
+u znfa_any(u vc) {
+ u v1 = 0;
+ u v2 = 0;
+ u v3 = 0;
+ u v4 = 0;
+ v1 = (u)znfa_lit;
+ v2 = vc;
+ v3 = -1UL;
+ v4 = ((u(*)())v1)(v2, v3);
+ return v4;
+}
+u znfa_concat(u vc, u va, u vb) {
+ u vi = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ if (!va) goto b5;
+ v4 = 0UL;
+b6: if (!v4) goto b3;
+ return 0UL;
+b3: if (!vb) goto b11;
+ v5 = 0UL;
+b12: if (!v5) goto b9;
+ return 0UL;
+b9: vi = 0UL;
+b13: if (vi != *(u*)(va + 40UL)) goto b17;
+ v6 = (u)zfree;
+ v7 = *(u*)(vc + 0UL);
+ v8 = *(u*)(va + 32UL);
+ v9 = ((u(*)())v6)(v7, v8);
+ v9;
+ *(u*)(va + 32UL) = *(u*)(vb + 32UL);
+ *(u*)(va + 40UL) = *(u*)(vb + 40UL);
+ return va;
+b17: *(u*)(*(u*)(*(u*)(va + 32UL) + vi * 8UL) + 16UL) = vb;
+ vi = vi + 1UL;
+ goto b13;
+b11: v5 = 1UL;
+ goto b12;
+b5: v4 = 1UL;
+ goto b6;
+}
+u znfa_empty(u vc) {
+ u vn = 0;
+ u v2 = 0;
+ u v3 = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ v2 = (u)zalloc;
+ v3 = *(u*)(vc + 0UL);
+ v4 = 48UL;
+ v5 = ((u(*)())v2)(v3, v4);
+ vn = v5;
+ *(u*)(vn + 8UL) = -1UL;
+ *(u*)(vn + 16UL) = 0UL;
+ *(u*)(vn + 24UL) = 0UL;
+ v6 = (u)zalloc;
+ v7 = *(u*)(vc + 0UL);
+ v8 = 8UL;
+ v9 = ((u(*)())v6)(v7, v8);
+ *(u*)(vn + 32UL) = v9;
+ *(u*)(vn + 40UL) = 1UL;
+ *(u*)(*(u*)(vn + 32UL) + 0UL * 8UL) = vn;
+ return vn;
+}
+u znfa_lit(u vc, u vch) {
+ u vn = 0;
+ u v3 = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ v3 = (u)zalloc;
+ v4 = *(u*)(vc + 0UL);
+ v5 = 48UL;
+ v6 = ((u(*)())v3)(v4, v5);
+ vn = v6;
+ *(u*)(vn + 16UL) = 0UL;
+ *(u*)(vn + 8UL) = vch;
+ v7 = (u)zalloc;
+ v8 = *(u*)(vc + 0UL);
+ v9 = 8UL;
+ v10 = ((u(*)())v7)(v8, v9);
+ *(u*)(vn + 32UL) = v10;
+ *(u*)(vn + 40UL) = 1UL;
+ *(u*)(*(u*)(vn + 32UL) + 0UL * 8UL) = vn;
+ return vn;
+}
+u znfa_plus(u vc, u va) {
+ u vb = 0;
+ u v3 = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ if (!va) goto b5;
+ v3 = 0UL;
+b6: if (!v3) goto b3;
+ return 0UL;
+b3: v4 = (u)zalloc;
+ v5 = *(u*)(vc + 0UL);
+ v6 = 48UL;
+ v7 = ((u(*)())v4)(v5, v6);
+ vb = v7;
+ *(u*)(vb + 8UL) = -1UL;
+ *(u*)(vb + 16UL) = 0UL;
+ *(u*)(vb + 24UL) = va;
+ v8 = (u)zalloc;
+ v9 = *(u*)(vc + 0UL);
+ v10 = 8UL;
+ v11 = ((u(*)())v8)(v9, v10);
+ *(u*)(vb + 32UL) = v11;
+ *(u*)(vb + 40UL) = 1UL;
+ *(u*)(*(u*)(vb + 32UL) + 0UL * 8UL) = vb;
+ v12 = (u)znfa_concat;
+ v13 = vc;
+ v14 = va;
+ v15 = vb;
+ v16 = ((u(*)())v12)(v13, v14, v15);
+ return v16;
+b5: v3 = 1UL;
+ goto b6;
+}
+u znfa_qmark(u vc, u va) {
+ u vb = 0;
+ u vi = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ if (!va) goto b5;
+ v4 = 0UL;
+b6: if (!v4) goto b3;
+ v5 = (u)znfa_empty;
+ v6 = vc;
+ v7 = ((u(*)())v5)(v6);
+ return v7;
+b3: v8 = (u)zalloc;
+ v9 = *(u*)(vc + 0UL);
+ v10 = 48UL;
+ v11 = ((u(*)())v8)(v9, v10);
+ vb = v11;
+ *(u*)(vb + 8UL) = -1UL;
+ *(u*)(vb + 16UL) = 0UL;
+ *(u*)(vb + 24UL) = va;
+ *(u*)(vb + 40UL) = *(u*)(va + 40UL) + 1UL;
+ v12 = (u)zalloc;
+ v13 = *(u*)(vc + 0UL);
+ v14 = 8UL * *(u*)(vb + 40UL);
+ v15 = ((u(*)())v12)(v13, v14);
+ *(u*)(vb + 32UL) = v15;
+b10: if (vi != *(u*)(va + 40UL)) goto b14;
+ *(u*)(*(u*)(vb + 32UL) + vi * 8UL) = vb;
+ v16 = (u)zfree;
+ v17 = *(u*)(vc + 0UL);
+ v18 = *(u*)(va + 32UL);
+ v19 = ((u(*)())v16)(v17, v18);
+ v19;
+ return vb;
+b14: *(u*)(*(u*)(vb + 32UL) + vi * 8UL) = *(u*)(*(u*)(va + 32UL) + vi * 8UL);
+ vi = vi + 1UL;
+ goto b10;
+b5: v4 = 1UL;
+ goto b6;
+}
+u znfa_star(u vc, u va) {
+ u v2 = 0;
+ u v3 = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ v2 = (u)znfa_plus;
+ v3 = vc;
+ v4 = va;
+ v5 = ((u(*)())v2)(v3, v4);
+ va = v5;
+ v6 = (u)znfa_qmark;
+ v7 = vc;
+ v8 = va;
+ v9 = ((u(*)())v6)(v7, v8);
+ return v9;
+}
u znode_to_str(u vkind) {
if (vkind != 0UL) goto b3;
return (u)"N_IDENT";
@@ -28314,11 +29126,11 @@ b22: zchoice(vc);
if (v6 == 0UL) goto b26;
goto b9;
b26: zchoice(vc);
- v7 = zpeg_P_lalr_grammar(vc);
+ v7 = zpeg_P_lexer_grammar(vc);
if (v7 == 0UL) goto b30;
goto b9;
b30: zchoice(vc);
- v8 = zpeg_P_lexer_grammar(vc);
+ v8 = zpeg_P_lalr_grammar(vc);
if (v8 == 0UL) goto b34;
goto b9;
b34: zfail(vc);
@@ -28523,13 +29335,13 @@ b5: zleave(vc, 90UL);
}
u zpeg_P_lalr_alternative(u vc) {
u v1 = 0;
- zenter(vc, 110UL);
+ zenter(vc, 120UL);
b3: zchoice(vc);
v1 = zpeg_P_lalr_suffix(vc);
if (v1 == 0UL) goto b4;
zcommit(vc);
goto b3;
-b4: zleave(vc, 110UL);
+b4: zleave(vc, 120UL);
return 1UL;
}
u zpeg_P_lalr_grammar(u vc) {
@@ -28541,7 +29353,7 @@ u zpeg_P_lalr_grammar(u vc) {
u v6 = 0;
u v7 = 0;
u v8 = 0;
- zenter(vc, 113UL);
+ zenter(vc, 123UL);
v1 = zliteral(vc, (u)"lalr");
if (v1 == 0UL) goto b1;
v2 = zpeg_P_sp(vc);
@@ -28561,7 +29373,7 @@ b14: v7 = zliteral(vc, (u)"}");
if (v7 == 0UL) goto b1;
v8 = zpeg_P_sp(vc);
if (v8 == 0UL) goto b1;
- zleave(vc, 113UL);
+ zleave(vc, 123UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28570,12 +29382,12 @@ u zpeg_P_lalr_op(u vc) {
u v1 = 0;
u v2 = 0;
u v3 = 0;
- zenter(vc, 108UL);
+ zenter(vc, 118UL);
zchoice(vc);
v1 = zliteral(vc, (u)"*");
if (v1 == 0UL) goto b5;
b4: zcommit(vc);
- zleave(vc, 108UL);
+ zleave(vc, 118UL);
return 1UL;
b5: zchoice(vc);
v2 = zliteral(vc, (u)"+");
@@ -28594,7 +29406,7 @@ u zpeg_P_lalr_pattern(u vc) {
u v2 = 0;
u v3 = 0;
u v4 = 0;
- zenter(vc, 111UL);
+ zenter(vc, 121UL);
v1 = zpeg_P_lalr_alternative(vc);
if (v1 == 0UL) goto b1;
b5: zchoice(vc);
@@ -28606,7 +29418,7 @@ b5: zchoice(vc);
if (v4 == 0UL) goto b6;
zcommit(vc);
goto b5;
-b6: zleave(vc, 111UL);
+b6: zleave(vc, 121UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28619,7 +29431,7 @@ u zpeg_P_lalr_primary(u vc) {
u v5 = 0;
u v6 = 0;
u v7 = 0;
- zenter(vc, 107UL);
+ zenter(vc, 117UL);
zchoice(vc);
v1 = zliteral(vc, (u)"(");
if (v1 == 0UL) goto b5;
@@ -28632,7 +29444,7 @@ u zpeg_P_lalr_primary(u vc) {
v5 = zpeg_P_sp(vc);
if (v5 == 0UL) goto b5;
b4: zcommit(vc);
- zleave(vc, 107UL);
+ zleave(vc, 117UL);
return 1UL;
b5: zchoice(vc);
v6 = zpeg_P_ident(vc);
@@ -28652,7 +29464,7 @@ u zpeg_P_lalr_rule(u vc) {
u v5 = 0;
u v6 = 0;
u v7 = 0;
- zenter(vc, 112UL);
+ zenter(vc, 122UL);
v1 = zpeg_P_ident(vc);
if (v1 == 0UL) goto b1;
v2 = zpeg_P_sp(vc);
@@ -28667,7 +29479,7 @@ u zpeg_P_lalr_rule(u vc) {
if (v6 == 0UL) goto b1;
v7 = zpeg_P_sp(vc);
if (v7 == 0UL) goto b1;
- zleave(vc, 112UL);
+ zleave(vc, 122UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28676,7 +29488,7 @@ u zpeg_P_lalr_suffix(u vc) {
u v1 = 0;
u v2 = 0;
u v3 = 0;
- zenter(vc, 109UL);
+ zenter(vc, 119UL);
v1 = zpeg_P_lalr_primary(vc);
if (v1 == 0UL) goto b1;
b5: zchoice(vc);
@@ -28686,7 +29498,7 @@ b5: zchoice(vc);
if (v3 == 0UL) goto b6;
zcommit(vc);
goto b5;
-b6: zleave(vc, 109UL);
+b6: zleave(vc, 119UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28719,13 +29531,13 @@ b5: zleave(vc, 89UL);
}
u zpeg_P_lexer_alternative(u vc) {
u v1 = 0;
- zenter(vc, 119UL);
+ zenter(vc, 113UL);
b3: zchoice(vc);
v1 = zpeg_P_lexer_suffix(vc);
if (v1 == 0UL) goto b4;
zcommit(vc);
goto b3;
-b4: zleave(vc, 119UL);
+b4: zleave(vc, 113UL);
return 1UL;
}
u zpeg_P_lexer_charset(u vc) {
@@ -28736,7 +29548,7 @@ u zpeg_P_lexer_charset(u vc) {
u v5 = 0;
u v6 = 0;
u v7 = 0;
- zenter(vc, 116UL);
+ zenter(vc, 110UL);
v1 = zliteral(vc, (u)"[[");
if (v1 == 0UL) goto b1;
b5: zchoice(vc);
@@ -28757,7 +29569,7 @@ b9: zcommit(vc);
b26: zfail(vc);
v7 = zliteral(vc, (u)"]]");
if (v7 == 0UL) goto b1;
- zleave(vc, 116UL);
+ zleave(vc, 110UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28773,10 +29585,10 @@ b18: v4 = zany(vc);
}
u zpeg_P_lexer_dot(u vc) {
u v1 = 0;
- zenter(vc, 114UL);
+ zenter(vc, 107UL);
v1 = zliteral(vc, (u)".");
if (v1 == 0UL) goto b1;
- zleave(vc, 114UL);
+ zleave(vc, 107UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28790,7 +29602,7 @@ u zpeg_P_lexer_grammar(u vc) {
u v6 = 0;
u v7 = 0;
u v8 = 0;
- zenter(vc, 122UL);
+ zenter(vc, 116UL);
v1 = zliteral(vc, (u)"lexer");
if (v1 == 0UL) goto b1;
v2 = zpeg_P_sp(vc);
@@ -28810,7 +29622,7 @@ b14: v7 = zliteral(vc, (u)"}");
if (v7 == 0UL) goto b1;
v8 = zpeg_P_sp(vc);
if (v8 == 0UL) goto b1;
- zleave(vc, 122UL);
+ zleave(vc, 116UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28819,12 +29631,12 @@ u zpeg_P_lexer_op(u vc) {
u v1 = 0;
u v2 = 0;
u v3 = 0;
- zenter(vc, 115UL);
+ zenter(vc, 108UL);
zchoice(vc);
v1 = zliteral(vc, (u)"*");
if (v1 == 0UL) goto b5;
b4: zcommit(vc);
- zleave(vc, 115UL);
+ zleave(vc, 108UL);
return 1UL;
b5: zchoice(vc);
v2 = zliteral(vc, (u)"+");
@@ -28843,7 +29655,7 @@ u zpeg_P_lexer_pattern(u vc) {
u v2 = 0;
u v3 = 0;
u v4 = 0;
- zenter(vc, 120UL);
+ zenter(vc, 114UL);
v1 = zpeg_P_lexer_alternative(vc);
if (v1 == 0UL) goto b1;
b5: zchoice(vc);
@@ -28855,7 +29667,7 @@ b5: zchoice(vc);
if (v4 == 0UL) goto b6;
zcommit(vc);
goto b5;
-b6: zleave(vc, 120UL);
+b6: zleave(vc, 114UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28872,7 +29684,7 @@ u zpeg_P_lexer_primary(u vc) {
u v9 = 0;
u v10 = 0;
u v11 = 0;
- zenter(vc, 117UL);
+ zenter(vc, 111UL);
zchoice(vc);
v1 = zliteral(vc, (u)"(");
if (v1 == 0UL) goto b5;
@@ -28885,7 +29697,7 @@ u zpeg_P_lexer_primary(u vc) {
v5 = zpeg_P_sp(vc);
if (v5 == 0UL) goto b5;
b4: zcommit(vc);
- zleave(vc, 117UL);
+ zleave(vc, 111UL);
return 1UL;
b5: zchoice(vc);
v6 = zpeg_P_lexer_dot(vc);
@@ -28894,7 +29706,7 @@ b5: zchoice(vc);
if (v7 == 0UL) goto b17;
goto b4;
b17: zchoice(vc);
- v8 = zpeg_P_str(vc);
+ v8 = zpeg_P_lexer_str(vc);
if (v8 == 0UL) goto b23;
v9 = zpeg_P_sp(vc);
if (v9 == 0UL) goto b23;
@@ -28917,7 +29729,7 @@ u zpeg_P_lexer_rule(u vc) {
u v5 = 0;
u v6 = 0;
u v7 = 0;
- zenter(vc, 121UL);
+ zenter(vc, 115UL);
v1 = zpeg_P_ident(vc);
if (v1 == 0UL) goto b1;
v2 = zpeg_P_sp(vc);
@@ -28932,7 +29744,17 @@ u zpeg_P_lexer_rule(u vc) {
if (v6 == 0UL) goto b1;
v7 = zpeg_P_sp(vc);
if (v7 == 0UL) goto b1;
- zleave(vc, 121UL);
+ zleave(vc, 115UL);
+ return 1UL;
+b1: zfail(vc);
+ return 0UL;
+}
+u zpeg_P_lexer_str(u vc) {
+ u v1 = 0;
+ zenter(vc, 109UL);
+ v1 = zpeg_P_str(vc);
+ if (v1 == 0UL) goto b1;
+ zleave(vc, 109UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -28941,7 +29763,7 @@ u zpeg_P_lexer_suffix(u vc) {
u v1 = 0;
u v2 = 0;
u v3 = 0;
- zenter(vc, 118UL);
+ zenter(vc, 112UL);
v1 = zpeg_P_lexer_primary(vc);
if (v1 == 0UL) goto b1;
b5: zchoice(vc);
@@ -28951,7 +29773,7 @@ b5: zchoice(vc);
if (v3 == 0UL) goto b6;
zcommit(vc);
goto b5;
-b6: zleave(vc, 118UL);
+b6: zleave(vc, 112UL);
return 1UL;
b1: zfail(vc);
return 0UL;
@@ -31326,6 +32148,10 @@ u zreconstruct(u vc, u vpn) {
u v47 = 0;
u v48 = 0;
u v49 = 0;
+ u v50 = 0;
+ u v51 = 0;
+ u v52 = 0;
+ u v53 = 0;
v6 = (u)zassert;
v7 = (u)(*(u*)(vpn + 0UL) == 0UL);
v8 = (u)"grammar";
@@ -31344,17 +32170,17 @@ b6: if (*(u*)(vpn + 0UL) != 2UL) goto b12;
v13 = vpn;
v14 = ((u(*)())v11)(v12, v13);
vn = v14;
-b10: v41 = (u)zmknode1;
- v42 = vc;
- v43 = 14UL;
- v44 = vn;
- v45 = ((u(*)())v41)(v42, v43, v44);
- vp = v45;
- v46 = (u)zcopypos;
- v47 = vp;
- v48 = vpn;
- v49 = ((u(*)())v46)(v47, v48);
- v49;
+b10: v45 = (u)zmknode1;
+ v46 = vc;
+ v47 = 14UL;
+ v48 = vn;
+ v49 = ((u(*)())v45)(v46, v47, v48);
+ vp = v49;
+ v50 = (u)zcopypos;
+ v51 = vp;
+ v52 = vpn;
+ v53 = ((u(*)())v50)(v51, v52);
+ v53;
*(u*)vlink = vp;
vlink = vp + 16UL;
vpn = *(u*)(vpn + 8UL);
@@ -31384,29 +32210,33 @@ b21: if (*(u*)(vpn + 0UL) != 93UL) goto b24;
v27 = (u)zpeg_compile;
v28 = *(u*)(vc + 16UL);
v29 = vpn;
- v30 = *(u*)(vc + 24UL);
+ v30 = *(u*)(vc + 32UL);
v31 = ((u(*)())v27)(v28, v29, v30);
v31;
vpn = *(u*)(vpn + 8UL);
goto b2;
-b24: if (*(u*)(vpn + 0UL) != 122UL) goto b27;
- v32 = (u)zdie;
- v33 = (u)"lexer";
- v34 = ((u(*)())v32)(v33);
- v34;
+b24: if (*(u*)(vpn + 0UL) != 116UL) goto b27;
+ v32 = (u)zlexer_compile;
+ v33 = *(u*)(vc + 24UL);
+ v34 = vpn;
+ v35 = *(u*)(vc + 32UL);
+ v36 = ((u(*)())v32)(v33, v34, v35);
+ v36;
vpn = *(u*)(vpn + 8UL);
goto b2;
-b27: if (*(u*)(vpn + 0UL) != 113UL) goto b30;
- v35 = (u)zdie;
- v36 = (u)"lalr";
- v37 = ((u(*)())v35)(v36);
- v37;
+b27: if (*(u*)(vpn + 0UL) != 123UL) goto b30;
+ v37 = (u)zlalr_compiler;
+ v38 = *(u*)(vc + 24UL);
+ v39 = vpn;
+ v40 = *(u*)(vc + 32UL);
+ v41 = ((u(*)())v37)(v38, v39, v40);
+ v41;
vpn = *(u*)(vpn + 8UL);
goto b2;
-b30: v38 = (u)zdie;
- v39 = (u)"invalid decl";
- v40 = ((u(*)())v38)(v39);
- v40;
+b30: v42 = (u)zdie;
+ v43 = (u)"invalid decl";
+ v44 = ((u(*)())v42)(v43);
+ v44;
goto b10;
b8: v10 = 1UL;
goto b9;
@@ -34311,7 +35141,7 @@ u zsetup_parser(u vcc, u verr) {
u v17 = 0;
v3 = (u)zalloc;
v4 = *(u*)(vcc + 0UL);
- v5 = 32UL;
+ v5 = 40UL;
v6 = ((u(*)())v3)(v4, v5);
vc = v6;
*(u*)(vc + 0UL) = *(u*)(vcc + 0UL);
@@ -34328,7 +35158,8 @@ u zsetup_parser(u vcc, u verr) {
v16 = vcc;
v17 = ((u(*)())v15)(v16);
*(u*)(vc + 16UL) = v17;
- *(u*)(vc + 24UL) = verr;
+ *(u*)(vc + 32UL) = verr;
+ *(u*)(vc + 24UL) = vcc;
return vc;
}
u zsetup_peg(u vcc) {
@@ -40595,41 +41426,49 @@ b18: if (vch != 39UL) goto b20;
return 39UL;
b20: if (vch != 34UL) goto b22;
return 34UL;
-b22: if (vch != 120UL) goto b24;
- if ((s)*(u*)vi < (s)vlen) goto b27;
+b22: if (vch != 91UL) goto b24;
+ return 91UL;
+b24: if (vch != 93UL) goto b26;
+ return 93UL;
+b26: if (vch != 45UL) goto b28;
+ return 45UL;
+b28: if (vch != 94UL) goto b30;
+ return 94UL;
+b30: if (vch != 120UL) goto b32;
+ if ((s)*(u*)vi < (s)vlen) goto b35;
*(u*)vok = 0UL;
return 0UL;
-b27: vch = (u)*(b*)(vs + *(u*)vi * 1UL);
+b35: vch = (u)*(b*)(vs + *(u*)vi * 1UL);
*(u*)vi = *(u*)vi + 1UL;
v6 = (u)zhexdig;
v7 = vch;
v8 = vok;
v9 = ((u(*)())v6)(v7, v8);
vhex = v9 * 16UL;
- if (!*(u*)vok) goto b33;
+ if (!*(u*)vok) goto b41;
v10 = 0UL;
-b34: if (!v10) goto b31;
+b42: if (!v10) goto b39;
return 0UL;
-b31: if ((s)*(u*)vi < (s)vlen) goto b37;
+b39: if ((s)*(u*)vi < (s)vlen) goto b45;
*(u*)vok = 0UL;
return 0UL;
-b37: vch = (u)*(b*)(vs + *(u*)vi * 1UL);
+b45: vch = (u)*(b*)(vs + *(u*)vi * 1UL);
*(u*)vi = *(u*)vi + 1UL;
v11 = (u)zhexdig;
v12 = vch;
v13 = vok;
v14 = ((u(*)())v11)(v12, v13);
vhex = vhex | v14;
- if (!*(u*)vok) goto b43;
+ if (!*(u*)vok) goto b51;
v15 = 0UL;
-b44: if (!v15) goto b41;
+b52: if (!v15) goto b49;
return 0UL;
-b41: return vhex;
-b43: v15 = 1UL;
- goto b44;
-b33: v10 = 1UL;
- goto b34;
-b24: *(u*)vok = 0UL;
+b49: return vhex;
+b51: v15 = 1UL;
+ goto b52;
+b41: v10 = 1UL;
+ goto b42;
+b32: *(u*)vok = 0UL;
return 0UL;
}
u zunify(u vc, u va, u vb) {
diff --git a/cc3.om b/cc3.om
@@ -1,5 +1,5 @@
peg_grammar {
- grammar = sp (enum_decl / struct_decl / union_decl / func_decl / peg_grammar / lalr_grammar / lexer_grammar)* !.;
+ grammar = sp (enum_decl / struct_decl / union_decl / func_decl / peg_grammar / lexer_grammar / lalr_grammar)* !.;
enum_item = ident sp ("=" sp expr)?;
enum_decl = enum sp "{" sp (enum_item ("," sp enum_item)*)? ("," sp)? "}" sp;
@@ -149,21 +149,22 @@ peg_grammar {
peg_call = peg_identifier !(sp "=");
peg_identifier = [[a-zA-Z0-9_]]+;
- lalr_primary = "(" sp lalr_pattern ")" sp / ident sp;
- lalr_op = "*" / "+" / "?";
- lalr_suffix = lalr_primary (lalr_op sp)*;
- lalr_alternative = lalr_suffix*;
- lalr_pattern = lalr_alternative ("|" sp lalr_alternative)*;
- lalr_rule = ident sp "=" sp lalr_pattern ";" sp;
- lalr_grammar = "lalr" sp "{" sp lalr_rule+ "}" sp;
-
lexer_dot = ".";
lexer_op = "*" / "+" / "?";
+ lexer_str = str;
lexer_charset = "[[" ( !"]" !"\\" . / "\\" . )* "]]";
- lexer_primary = "(" sp lexer_pattern ")" sp / lexer_dot sp / str sp / lexer_charset sp;
+ lexer_primary = "(" sp lexer_pattern ")" sp / lexer_dot sp / lexer_str sp / lexer_charset sp;
lexer_suffix = lexer_primary (lexer_op sp)*;
lexer_alternative = lexer_suffix*;
lexer_pattern = lexer_alternative ("|" sp lexer_alternative)*;
lexer_rule = ident sp "=" sp lexer_pattern ";" sp;
lexer_grammar = "lexer" sp "{" sp lexer_rule+ "}" sp;
+
+ lalr_primary = "(" sp lalr_pattern ")" sp / ident sp;
+ lalr_op = "*" / "+" / "?";
+ lalr_suffix = lalr_primary (lalr_op sp)*;
+ lalr_alternative = lalr_suffix*;
+ lalr_pattern = lalr_alternative ("|" sp lalr_alternative)*;
+ lalr_rule = ident sp "=" sp lalr_pattern ";" sp;
+ lalr_grammar = "lalr" sp "{" sp lalr_rule+ "}" sp;
}
diff --git a/lexer.om b/lexer.om
@@ -1,3 +1,415 @@
+// https://swtch.com/~rsc/regexp/regexp1.html
+
+struct nfa {
+ tag: int;
+ ch: int;
+ out: *nfa;
+ alt: *nfa;
+ tail: **nfa;
+ ntail: int;
+}
+
+func nfa_concat(c: *compiler, a: *nfa, b:* nfa): *nfa {
+ var i: int;
+
+ if !a {
+ return nil;
+ }
+
+ if !b {
+ return nil;
+ }
+
+ i = 0;
+ loop {
+ if i == a.ntail {
+ break;
+ }
+
+ a.tail[i].out = b;
+
+ i = i + 1;
+ }
+
+ free(c.a, a.tail as *byte);
+
+ a.tail = b.tail;
+ a.ntail = b.ntail;
+
+ return a;
+}
+
+func nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa {
+ var ret: *nfa;
+ var n: *nfa;
+ var tail: **nfa;
+ var i: int;
+ var j: int;
+
+ if !a {
+ return b;
+ }
+
+ if !b {
+ return a;
+ }
+
+ n = alloc(c.a, sizeof(*n)) as *nfa;
+ n.ch = -1;
+ n.out = a;
+ n.alt = b;
+ n.ntail = a.ntail + b.ntail;
+ n.tail = alloc(c.a, sizeof(*tail) * n.ntail) as **nfa;
+
+ i = 0;
+ j = 0;
+ loop {
+ if i == a.ntail {
+ break;
+ }
+
+ n.tail[j] = a.tail[i];
+
+ j = j + 1;
+ i = i + 1;
+ }
+
+ loop {
+ if i == b.ntail {
+ break;
+ }
+
+ n.tail[j] = b.tail[i];
+
+ j = j + 1;
+ i = i + 1;
+ }
+
+ free(c.a, a.tail as *byte);
+ free(c.a, b.tail as *byte);
+
+ return ret;
+}
+
+func nfa_plus(c: *compiler, a: *nfa): *nfa {
+ var b: *nfa;
+ if !a {
+ return nil;
+ }
+ b = alloc(c.a, sizeof(*b)) as *nfa;
+ b.ch = -1;
+ b.out = nil;
+ b.alt = a;
+ b.tail = alloc(c.a, sizeof(*b.tail)) as **nfa;
+ b.ntail = 1;
+ b.tail[0] = b;
+ return nfa_concat(c, a, b);
+}
+
+func nfa_qmark(c: *compiler, a: *nfa): *nfa {
+ var b: *nfa;
+ var i: int;
+ if !a {
+ return nfa_empty(c);
+ }
+ b = alloc(c.a, sizeof(*b)) as *nfa;
+ b.ch = -1;
+ b.out = nil;
+ b.alt = a;
+ b.ntail = a.ntail + 1;
+ b.tail = alloc(c.a, sizeof(*b.tail) * b.ntail) as **nfa;
+ loop {
+ if i == a.ntail {
+ break;
+ }
+ b.tail[i] = a.tail[i];
+ i = i + 1;
+ }
+ b.tail[i] = b;
+ free(c.a, a.tail as *byte);
+ return b;
+}
+
+func nfa_star(c: *compiler, a: *nfa): *nfa {
+ a = nfa_plus(c, a);
+ return nfa_qmark(c, a);
+}
+
+func nfa_lit(c: *compiler, ch: int): *nfa {
+ var n: *nfa;
+ n = alloc(c.a, sizeof(*n)) as *nfa;
+ n.out = nil;
+ n.ch = ch;
+ n.tail = alloc(c.a, sizeof(*n.tail)) as **nfa;
+ n.ntail = 1;
+ n.tail[0] = n;
+ return n;
+}
+
+func nfa_empty(c: *compiler): *nfa {
+ var n: *nfa;
+ n = alloc(c.a, sizeof(*n)) as *nfa;
+ n.ch = -1;
+ n.out = nil;
+ n.alt = nil;
+ n.tail = alloc(c.a, sizeof(*n.tail)) as **nfa;
+ n.ntail = 1;
+ n.tail[0] = n;
+ return n;
+}
+
+func nfa_any(c: *compiler): *nfa {
+ return nfa_lit(c, -1);
+}
+
func lexer_compile(c: *compiler, pn: *peg_node, err: *file) {
- die("lexer");
+ var a: *nfa;
+ var b: *nfa;
+
+ pn = pn.child;
+ loop {
+ if !pn {
+ break;
+ }
+
+ b = lexer_compile_rule(c, pn);
+ a = nfa_alt(c, a, b);
+
+ pn = pn.next;
+ }
+}
+
+func lexer_compile_rule(c: *compiler, pn: *peg_node): *nfa {
+ var ident: *peg_node;
+ var pat: *peg_node;
+ var a: *nfa;
+ var b: *nfa;
+
+ ident = pn.child;
+ pat = ident.next;
+
+ a = lexer_compile_pattern(c, pat);
+ b = nfa_empty(c);
+ b.tag = 1;
+
+ return nfa_concat(c, a, b);
+}
+
+func lexer_compile_pattern(c: *compiler, pn: *peg_node): *nfa {
+ var alt: *peg_node;
+ var a: *nfa;
+ var b: *nfa;
+
+ alt = pn.child;
+ a = nil;
+ loop {
+ if !alt {
+ break;
+ }
+
+ b = lexer_compile_alternative(c, alt);
+
+ a = nfa_alt(c, a, b);
+
+ alt = alt.next;
+ }
+
+ return a;
+}
+
+func lexer_compile_alternative(c: *compiler, pn: *peg_node): *nfa {
+ var part: *peg_node;
+ var a: *nfa;
+ var b: *nfa;
+
+ part = pn.child;
+ a = nfa_empty(c);
+ loop {
+ if !part {
+ break;
+ }
+
+ b = lexer_compile_suffix(c, part);
+
+ a = nfa_concat(c, a, b);
+
+ part = part.next;
+ }
+
+ return a;
+}
+
+func lexer_compile_suffix(c: *compiler, pn: *peg_node): *nfa {
+ var primary: *peg_node;
+ var op: *peg_node;
+ var n: *nfa;
+ var ch: int;
+ var zero: int;
+ var more: int;
+
+ primary = pn.child;
+
+ n = lexer_compile_primary(c, primary);
+
+ zero = 0;
+ more = 0;
+
+ op = primary.next;
+ loop {
+ if !op {
+ break;
+ }
+
+ ch = op.str[0] as int;
+
+ if ch == '*' {
+ zero = 1;
+ more = 1;
+ } else if ch == '+' {
+ more = 1;
+ } else if ch == '?' {
+ zero = 1;
+ }
+
+ op = op.next;
+ }
+
+ if zero && more {
+ return nfa_star(c, n);
+ } else if zero {
+ return nfa_qmark(c, n);
+ } else if more {
+ return nfa_plus(c, n);
+ } else {
+ return n;
+ }
+}
+
+func lexer_compile_primary(c: *compiler, pn: *peg_node): *nfa {
+ var tag: int;
+
+ pn = pn.child;
+ tag = pn.tag;
+
+ if tag == P_lexer_pattern {
+ return lexer_compile_pattern(c, pn);
+ } else if tag == P_lexer_dot {
+ return lexer_compile_any(c, pn);
+ } else if tag == P_lexer_str {
+ return lexer_compile_str(c, pn);
+ } else if tag == P_lexer_charset {
+ return lexer_compile_charset(c, pn);
+ } else {
+ die("invalid lexer_primary");
+ return nil;
+ }
+}
+
+func lexer_compile_any(c: *compiler, pn: *peg_node): *nfa {
+ return nfa_any(c);
+}
+
+func lexer_compile_str(c: *compiler, pn: *peg_node): *nfa {
+ var i: int;
+ var len: int;
+ var ok: int;
+ var ch: int;
+ var a: *nfa;
+ var b: *nfa;
+
+ i = 1;
+ len = pn.len - 1;
+ a = nfa_empty(c);
+ loop {
+ if i == len {
+ break;
+ }
+ ch = unescape(pn.str, &i, len, &ok);
+ if !ok {
+ die("lexer_compile_str: invalid escape");
+ }
+ b = nfa_lit(c, ch);
+ a = nfa_concat(c, a, b);
+ }
+ return a;
+}
+
+func lexer_compile_charset(c: *compiler, pn: *peg_node): *nfa {
+ var i: int;
+ var j: int;
+ var len: int;
+ var ch: int;
+ var x: int;
+ var ok: int;
+ var s: *byte;
+ var scratch: *byte;
+ var start: int;
+ var end: int;
+ var a: *nfa;
+ var b: *nfa;
+
+ scratch = alloc(c.a, 256);
+
+ s = pn.str;
+ i = 2;
+ len = pn.len - 2;
+ x = 1;
+ loop {
+ if i == len {
+ break;
+ }
+
+ ch = s[i] as int;
+ if ch == '\\' {
+ ch = unescape(s, &i, len, &ok);
+ if !ok {
+ die("lexer_compile_charset: invalid escape");
+ }
+ } else if ch == '^' {
+ x = 0;
+ i = i + 1;
+ continue;
+ } else {
+ i = i + 1;
+ }
+
+ start = ch;
+ end = ch;
+
+ if i != len && (s[i] as int) == '-' {
+ i = i + 1;
+ ch = unescape(s, &i, len, &ok);
+ if !ok {
+ die("lexer_compile_charset: invalid escape");
+ }
+ end = ch;
+ }
+
+ j = start;
+ loop {
+ if j > end {
+ break;
+ }
+
+ scratch[j] = x as byte;
+
+ j = j + 1;
+ }
+ }
+
+ j = 0;
+ loop {
+ if j == 256 {
+ break;
+ }
+ if scratch[j] {
+ b = nfa_lit(c, j);
+ a = nfa_alt(c, a, b);
+ }
+ j = j + 1;
+ }
+
+ free(c.a, scratch);
+
+ return a;
}
diff --git a/lib.om b/lib.om
@@ -463,6 +463,14 @@ func unescape(s: *byte, i: *int, len: int, ok: *int): int {
return '\'';
} else if ch == '\"' {
return '\"';
+ } else if ch == '[' {
+ return '[';
+ } else if ch == ']' {
+ return ']';
+ } else if ch == '-' {
+ return '-';
+ } else if ch == '^' {
+ return '^';
} else if ch == 'x' {
if *i >= len {
*ok = 0;