commit 096147389d9be724b7aed58cc12e0331a6614f07
parent f9c585acac4425598b1b0b0e739d1a33c22537c3
Author: erai <erai@omiltem.net>
Date: Sun, 6 Apr 2025 15:27:27 +0000
powerset construction
Diffstat:
M | cc0.c | | | 762 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------- |
M | cc1.om | | | 4 | ++++ |
M | cc4.om | | | 2 | +- |
M | lexer.om | | | 455 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
4 files changed, 1058 insertions(+), 165 deletions(-)
diff --git a/cc0.c b/cc0.c
@@ -48,6 +48,10 @@ u zdefine_ir_func();
u zdefstruct();
u zdefun();
u zdefunion();
+u zdfa_cmp();
+u zdfakey_add();
+u zdfakey_clear();
+u zdfakey_sort();
u zdie();
u zemit();
u zemit_align();
@@ -143,6 +147,7 @@ u zlexer_compile_primary();
u zlexer_compile_rule();
u zlexer_compile_str();
u zlexer_compile_suffix();
+u zlexer_explode();
u zliteral();
u zlocals_to_ir();
u zmain();
@@ -177,6 +182,7 @@ u zmktype_struct();
u zmktype_union();
u zmmap();
u znext_decl();
+u znfa2dfa();
u znfa_alt();
u znfa_any();
u znfa_concat();
@@ -2833,7 +2839,7 @@ u zcomp_setup(u va, u verr) {
u v13 = 0;
v3 = (u)zalloc;
v4 = va;
- v5 = 112UL;
+ v5 = 136UL;
v6 = ((u(*)())v3)(v4, v5);
vc = v6;
*(u*)(vc + 0UL) = va;
@@ -4019,6 +4025,178 @@ b18: v11 = 1UL;
b9: v5 = 1UL;
goto b10;
}
+u zdfa_cmp(u vk, u vd) {
+ u vi = 0;
+ u v3 = 0;
+ vi = 0UL;
+b1: if (vi != *(u*)(vk + 16UL)) goto b7;
+ if (vi != *(u*)(vd + 56UL)) goto b7;
+ v3 = 1UL;
+b9: if (!v3) goto b5;
+ return 0UL;
+b5: if (vi != *(u*)(vk + 16UL)) goto b12;
+ return -1UL;
+b12: if (vi != *(u*)(vd + 56UL)) goto b15;
+ return 1UL;
+b15: if ((s)*(u*)(*(u*)(*(u*)(vk + 8UL) + vi * 8UL) + 0UL) <= (s)*(u*)(*(u*)(*(u*)(vd + 48UL) + vi * 8UL) + 0UL)) goto b18;
+ return 1UL;
+b18: if ((s)*(u*)(*(u*)(*(u*)(vk + 8UL) + vi * 8UL) + 0UL) >= (s)*(u*)(*(u*)(*(u*)(vd + 48UL) + vi * 8UL) + 0UL)) goto b21;
+ return -1UL;
+b21: vi = vi + 1UL;
+ goto b1;
+b7: v3 = 0UL;
+ goto b9;
+}
+u zdfakey_add(u vk, u vn) {
+ u vtmp = 0;
+ u v3 = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ u v26 = 0;
+ u v27 = 0;
+ u v28 = 0;
+ u v29 = 0;
+ u v30 = 0;
+ if (!vn) goto b9;
+ v4 = 0UL;
+b10: if (!v4) goto b5;
+ v3 = 1UL;
+b7: if (!v3) goto b3;
+ return 0UL;
+b3: *(u*)(vn + 64UL) = 1UL;
+ if (!*(u*)(vk + 32UL)) goto b20;
+ v6 = 0UL;
+b21: if (!v6) goto b16;
+ v5 = 1UL;
+b18: if (!v5) goto b14;
+ *(u*)(vk + 32UL) = *(u*)(vn + 8UL);
+b12: if (*(u*)(vk + 16UL) != *(u*)(vk + 24UL)) goto b29;
+ if (*(u*)(vk + 24UL) != 0UL) goto b32;
+ *(u*)(vk + 24UL) = 16UL;
+b30: v8 = (u)zalloc;
+ v9 = *(u*)(vk + 0UL);
+ v10 = 8UL * *(u*)(vk + 24UL);
+ v11 = ((u(*)())v8)(v9, v10);
+ vtmp = v11;
+ v12 = (u)zmemcpy;
+ v13 = vtmp;
+ v14 = *(u*)(vk + 8UL);
+ v15 = 8UL * *(u*)(vk + 16UL);
+ v16 = ((u(*)())v12)(v13, v14, v15);
+ v16;
+ v17 = (u)zfree;
+ v18 = *(u*)(vk + 0UL);
+ v19 = *(u*)(vk + 8UL);
+ v20 = ((u(*)())v17)(v18, v19);
+ v20;
+ *(u*)(vk + 8UL) = vtmp;
+b27: *(u*)(*(u*)(vk + 8UL) + *(u*)(vk + 16UL) * 8UL) = vn;
+ *(u*)(vk + 16UL) = *(u*)(vk + 16UL) + 1UL;
+ if (!*(u*)(vn + 32UL)) goto b40;
+ if (*(u*)(*(u*)(vn + 32UL) + 16UL) != -1UL) goto b40;
+ v21 = 1UL;
+b42: if (!v21) goto b38;
+ v22 = (u)zdfakey_add;
+ v23 = vk;
+ v24 = *(u*)(vn + 32UL);
+ v25 = ((u(*)())v22)(v23, v24);
+ v25;
+b36: if (!*(u*)(vn + 40UL)) goto b48;
+ if (*(u*)(*(u*)(vn + 40UL) + 16UL) != -1UL) goto b48;
+ v26 = 1UL;
+b50: if (!v26) goto b46;
+ v27 = (u)zdfakey_add;
+ v28 = vk;
+ v29 = *(u*)(vn + 40UL);
+ v30 = ((u(*)())v27)(v28, v29);
+ v30;
+b44: return 0UL;
+b46: goto b44;
+b48: v26 = 0UL;
+ goto b50;
+b38: goto b36;
+b40: v21 = 0UL;
+ goto b42;
+b32: *(u*)(vk + 24UL) = *(u*)(vk + 24UL) * 2UL;
+ goto b30;
+b29: goto b27;
+b14: goto b12;
+b16: if (!*(u*)(vn + 8UL)) goto b24;
+ if ((s)*(u*)(vn + 8UL) >= (s)*(u*)(vk + 32UL)) goto b24;
+ v7 = 1UL;
+b26: if (!v7) goto b22;
+ v5 = 1UL;
+ goto b18;
+b22: v5 = 0UL;
+ goto b18;
+b24: v7 = 0UL;
+ goto b26;
+b20: v6 = 1UL;
+ goto b21;
+b5: if (!*(u*)(vn + 64UL)) goto b11;
+ v3 = 1UL;
+ goto b7;
+b11: v3 = 0UL;
+ goto b7;
+b9: v4 = 1UL;
+ goto b10;
+}
+u zdfakey_clear(u vk) {
+ u vi = 0;
+ vi = 0UL;
+b1: if (vi != *(u*)(vk + 16UL)) goto b5;
+ *(u*)(vk + 32UL) = 0UL;
+ *(u*)(vk + 16UL) = 0UL;
+ return 0UL;
+b5: *(u*)(*(u*)(*(u*)(vk + 8UL) + vi * 8UL) + 64UL) = 0UL;
+ vi = vi + 1UL;
+ goto b1;
+}
+u zdfakey_sort(u vk) {
+ u vi = 0;
+ u vj = 0;
+ u vtmp = 0;
+ u v4 = 0;
+ vi = 1UL;
+b1: if ((s)vi < (s)*(u*)(vk + 16UL)) goto b5;
+ return 0UL;
+b5: vj = vi;
+ vtmp = *(u*)(*(u*)(vk + 8UL) + vj * 8UL);
+b6: if (vj != 0UL) goto b12;
+ v4 = 1UL;
+b14: if (!v4) goto b10;
+ *(u*)(*(u*)(vk + 8UL) + vj * 8UL) = vtmp;
+ vi = vi + 1UL;
+ goto b1;
+b10: *(u*)(*(u*)(vk + 8UL) + vj * 8UL) = *(u*)(*(u*)(vk + 8UL) + (vj - 1UL) * 8UL);
+ vj = vj - 1UL;
+ goto b6;
+b12: if ((s)*(u*)(*(u*)(*(u*)(vk + 8UL) + (vj - 1UL) * 8UL) + 0UL) > (s)*(u*)(vtmp + 0UL)) goto b15;
+ v4 = 1UL;
+ goto b14;
+b15: v4 = 0UL;
+ goto b14;
+}
u zdie(u vmsg) {
u vlen = 0;
u v2 = 0;
@@ -22758,10 +22936,19 @@ u zlexer_compile(u vc, u vpn, u verr) {
u v12 = 0;
u v13 = 0;
u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
vpn = *(u*)(vpn + 16UL);
b1: if (!vpn) goto b7;
v5 = 0UL;
b8: if (!v5) goto b5;
+ v15 = (u)zlexer_explode;
+ v16 = vc;
+ v17 = va;
+ v18 = ((u(*)())v15)(v16, v17);
+ v18;
return 0UL;
b5: v6 = (u)zlexer_compile_rule;
v7 = vc;
@@ -22881,6 +23068,10 @@ u zlexer_compile_charset(u vc, u vpn) {
u v49 = 0;
u v50 = 0;
u v51 = 0;
+ u v52 = 0;
+ u v53 = 0;
+ u v54 = 0;
+ u v55 = 0;
v14 = (u)zalloc;
v15 = *(u*)(vc + 0UL);
v16 = 256UL;
@@ -22890,32 +23081,58 @@ u zlexer_compile_charset(u vc, u vpn) {
vi = 2UL;
vlen = *(u*)(vpn + 32UL) - 2UL;
vx = 1UL;
-b2: if (vi != vlen) goto b6;
+ if ((u)*(b*)(vs + vi * 1UL) != 94UL) goto b4;
+ vx = 0UL;
vj = 0UL;
-b40: if (vj != 256UL) goto b44;
- v48 = (u)zfree;
- v49 = *(u*)(vc + 0UL);
- v50 = vscratch;
- v51 = ((u(*)())v48)(v49, v50);
- v51;
+b5: if (vj != 256UL) goto b9;
+ vi = vi + 1UL;
+b2:b10: if (vi != vlen) goto b14;
+ vj = 0UL;
+b48: if (vj != 256UL) goto b52;
+ v52 = (u)zfree;
+ v53 = *(u*)(vc + 0UL);
+ v54 = vscratch;
+ v55 = ((u(*)())v52)(v53, v54);
+ v55;
return va;
-b44: if (!(u)*(b*)(vscratch + vj * 1UL)) goto b47;
- v39 = (u)znfa_lit;
- v40 = vc;
- v41 = vj;
- v42 = ((u(*)())v39)(v40, v41);
- vb = v42;
- v43 = (u)znfa_alt;
- v44 = vc;
- v45 = va;
- v46 = vb;
- v47 = ((u(*)())v43)(v44, v45, v46);
- va = v47;
-b45: vj = vj + 1UL;
- goto b40;
-b47: goto b45;
-b6: vch = (u)*(b*)(vs + vi * 1UL);
- if (vch != 92UL) goto b9;
+b52: if (!(u)*(b*)(vscratch + vj * 1UL)) goto b57;
+ v39 = 0UL;
+b58: if (!v39) goto b55;
+ vj = vj + 1UL;
+ goto b48;
+b55: vstart = vj;
+b59: if (vj != 256UL) goto b65;
+ v40 = 1UL;
+b67: if (!v40) goto b63;
+ vend = vj - 1UL;
+ v42 = (u)znfa_lit;
+ v43 = vc;
+ v44 = vstart;
+ v45 = vend;
+ v46 = ((u(*)())v42)(v43, v44, v45);
+ vb = v46;
+ v47 = (u)znfa_alt;
+ v48 = vc;
+ v49 = va;
+ v50 = vb;
+ v51 = ((u(*)())v47)(v48, v49, v50);
+ va = v51;
+ goto b48;
+b63: vj = vj + 1UL;
+ goto b59;
+b65: if (!(u)*(b*)(vscratch + vj * 1UL)) goto b70;
+ v41 = 0UL;
+b71: if (!v41) goto b68;
+ v40 = 1UL;
+ goto b67;
+b68: v40 = 0UL;
+ goto b67;
+b70: v41 = 1UL;
+ goto b71;
+b57: v39 = 1UL;
+ goto b58;
+b14: vch = (u)*(b*)(vs + vi * 1UL);
+ if (vch != 92UL) goto b17;
v18 = (u)zunescape;
v19 = vs;
v20 = (u)&vi;
@@ -22923,19 +23140,19 @@ b6: vch = (u)*(b*)(vs + vi * 1UL);
v22 = (u)&vok;
v23 = ((u(*)())v18)(v19, v20, v21, v22);
vch = v23;
- if (!vok) goto b15;
+ if (!vok) goto b23;
v24 = 0UL;
-b16: if (!v24) goto b13;
+b24: if (!v24) goto b21;
v25 = (u)zdie;
v26 = (u)"lexer_compile_charset: invalid escape";
v27 = ((u(*)())v25)(v26);
v27;
-b11:b7: vstart = vch;
+b19:b15: vstart = vch;
vend = vch;
- if (vi == vlen) goto b24;
- if ((u)*(b*)(vs + vi * 1UL) != 45UL) goto b24;
+ if (vi == vlen) goto b32;
+ if ((u)*(b*)(vs + vi * 1UL) != 45UL) goto b32;
v28 = 1UL;
-b26: if (!v28) goto b22;
+b34: if (!v28) goto b30;
vi = vi + 1UL;
v29 = (u)zunescape;
v30 = vs;
@@ -22944,35 +23161,39 @@ b26: if (!v28) goto b22;
v33 = (u)&vok;
v34 = ((u(*)())v29)(v30, v31, v32, v33);
vch = v34;
- if (!vok) goto b32;
+ if (!vok) goto b40;
v35 = 0UL;
-b33: if (!v35) goto b30;
+b41: if (!v35) goto b38;
v36 = (u)zdie;
v37 = (u)"lexer_compile_charset: invalid escape";
v38 = ((u(*)())v36)(v37);
v38;
-b28: vend = vch;
-b20: vj = vstart;
-b35: if ((s)vj <= (s)vend) goto b39;
- goto b2;
-b39: *(b*)(vscratch + vj * 1UL) = vx;
+b36: vend = vch;
+b28: vj = vstart;
+b43: if ((s)vj <= (s)vend) goto b47;
+ goto b10;
+b47: *(b*)(vscratch + vj * 1UL) = vx;
vj = vj + 1UL;
- goto b35;
+ goto b43;
+b38: goto b36;
+b40: v35 = 1UL;
+ goto b41;
b30: goto b28;
-b32: v35 = 1UL;
- goto b33;
-b22: goto b20;
-b24: v28 = 0UL;
- goto b26;
-b13: goto b11;
-b15: v24 = 1UL;
- goto b16;
-b9: if (vch != 94UL) goto b19;
+b32: v28 = 0UL;
+ goto b34;
+b21: goto b19;
+b23: v24 = 1UL;
+ goto b24;
+b17: if (vch != 94UL) goto b27;
vx = 0UL;
vi = vi + 1UL;
- goto b2;
-b19: vi = vi + 1UL;
- goto b7;
+ goto b10;
+b27: vi = vi + 1UL;
+ goto b15;
+b9: *(b*)(vscratch + vj * 1UL) = 1UL;
+ vj = vj + 1UL;
+ goto b5;
+b4: goto b2;
}
u zlexer_compile_pattern(u vc, u vpn) {
u valt = 0;
@@ -23091,7 +23312,7 @@ u zlexer_compile_rule(u vc, u vpn) {
v11 = vc;
v12 = ((u(*)())v10)(v11);
vb = v12;
- *(u*)(vb + 0UL) = 1UL;
+ *(u*)(vb + 8UL) = 1UL;
v13 = (u)znfa_concat;
v14 = vc;
v15 = va;
@@ -23128,6 +23349,7 @@ u zlexer_compile_str(u vc, u vpn) {
u v27 = 0;
u v28 = 0;
u v29 = 0;
+ u v30 = 0;
vi = 1UL;
vlen = *(u*)(vpn + 32UL) - 1UL;
v8 = (u)znfa_empty;
@@ -23153,14 +23375,15 @@ b13: if (!v17) goto b10;
b8: v21 = (u)znfa_lit;
v22 = vc;
v23 = vch;
- v24 = ((u(*)())v21)(v22, v23);
- vb = v24;
- v25 = (u)znfa_concat;
- v26 = vc;
- v27 = va;
- v28 = vb;
- v29 = ((u(*)())v25)(v26, v27, v28);
- va = v29;
+ v24 = vch;
+ v25 = ((u(*)())v21)(v22, v23, v24);
+ vb = v25;
+ v26 = (u)znfa_concat;
+ v27 = vc;
+ v28 = va;
+ v29 = vb;
+ v30 = ((u(*)())v26)(v27, v28, v29);
+ va = v30;
goto b2;
b10: goto b8;
b12: v17 = 1UL;
@@ -23243,6 +23466,50 @@ b16: goto b10;
b8: v12 = 1UL;
goto b9;
}
+u zlexer_explode(u vc, u vn) {
+ u v_k[5] = {0};
+ u vk = 0;
+ u vret = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ vk = (u)v_k;
+ *(u*)(vk + 0UL) = *(u*)(vc + 0UL);
+ if (*(u*)(vn + 16UL) == -1UL) goto b3;
+ v5 = (u)zdie;
+ v6 = (u)"expected epsilon";
+ v7 = ((u(*)())v5)(v6);
+ v7;
+b1: v8 = (u)zdfakey_add;
+ v9 = vk;
+ v10 = vn;
+ v11 = ((u(*)())v8)(v9, v10);
+ v11;
+ v12 = (u)znfa2dfa;
+ v13 = vc;
+ v14 = vk;
+ v15 = ((u(*)())v12)(v13, v14);
+ vret = v15;
+ v16 = (u)zfree;
+ v17 = *(u*)(vk + 0UL);
+ v18 = *(u*)(vk + 8UL);
+ v19 = ((u(*)())v16)(v17, v18);
+ v19;
+ return vret;
+b3: goto b1;
+}
u zliteral(u vc, u vs) {
u vi = 0;
u vch = 0;
@@ -24490,12 +24757,176 @@ u znext_decl(u vc, u vd) {
v4 = ((u(*)())v2)(v3);
return v4;
}
+u znfa2dfa(u vc, u vk) {
+ u vd = 0;
+ u vp = 0;
+ u vlink = 0;
+ u vdir = 0;
+ u vi = 0;
+ u vj = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ u v26 = 0;
+ u v27 = 0;
+ u v28 = 0;
+ u v29 = 0;
+ u v30 = 0;
+ u v31 = 0;
+ u v32 = 0;
+ u v33 = 0;
+ u v34 = 0;
+ u v35 = 0;
+ u v36 = 0;
+ u v37 = 0;
+ u v38 = 0;
+ u v39 = 0;
+ u v40 = 0;
+ u v41 = 0;
+ u v42 = 0;
+ u v43 = 0;
+ u v44 = 0;
+ u v45 = 0;
+ u v46 = 0;
+ u v47 = 0;
+ u v48 = 0;
+ u v49 = 0;
+ u v50 = 0;
+ u v51 = 0;
+ u v52 = 0;
+ if (*(u*)(vk + 16UL) != 0UL) goto b3;
+ return 0UL;
+b3: v8 = (u)zdfakey_sort;
+ v9 = vk;
+ v10 = ((u(*)())v8)(v9);
+ v10;
+ vp = 0UL;
+ vlink = vc + 128UL;
+b5: vd = *(u*)vlink;
+ if (!vd) goto b11;
+ v11 = 0UL;
+b12: if (!v11) goto b9;
+ v16 = (u)zalloc;
+ v17 = *(u*)(vc + 0UL);
+ v18 = 80UL;
+ v19 = ((u(*)())v16)(v17, v18);
+ vd = v19;
+ v20 = (u)zalloc;
+ v21 = *(u*)(vc + 0UL);
+ v22 = 8UL * 256UL;
+ v23 = ((u(*)())v20)(v21, v22);
+ *(u*)(vd + 40UL) = v23;
+ v24 = (u)zalloc;
+ v25 = *(u*)(vc + 0UL);
+ v26 = 8UL * *(u*)(vk + 16UL);
+ v27 = ((u(*)())v24)(v25, v26);
+ *(u*)(vd + 48UL) = v27;
+ *(u*)(vd + 56UL) = *(u*)(vk + 16UL);
+ *(u*)(vd + 32UL) = *(u*)(vc + 120UL);
+ *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
+ vi = 0UL;
+b22: if (vi != *(u*)(vk + 16UL)) goto b26;
+ *(u*)(vd + 72UL) = *(u*)(vk + 32UL);
+ v28 = (u)zrb_link;
+ v29 = vc + 128UL;
+ v30 = vlink;
+ v31 = vp;
+ v32 = vd + 0UL;
+ v33 = ((u(*)())v28)(v29, v30, v31, v32);
+ v33;
+ vi = 0UL;
+b28: if (vi != 256UL) goto b32;
+ return vd;
+b32: v34 = (u)zdfakey_clear;
+ v35 = vk;
+ v36 = ((u(*)())v34)(v35);
+ v36;
+ vj = 0UL;
+b34: if (vj != *(u*)(vd + 56UL)) goto b38;
+ v49 = (u)znfa2dfa;
+ v50 = vc;
+ v51 = vk;
+ v52 = ((u(*)())v49)(v50, v51);
+ *(u*)(*(u*)(vd + 40UL) + vi * 8UL) = v52;
+ vi = vi + 1UL;
+ goto b28;
+b38: if (!*(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 32UL)) goto b43;
+ if ((s)*(u*)(*(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 32UL) + 16UL) > (s)vi) goto b47;
+ if ((s)*(u*)(*(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 32UL) + 24UL) < (s)vi) goto b47;
+ v38 = 1UL;
+b49: if (!v38) goto b43;
+ v37 = 1UL;
+b45: if (!v37) goto b41;
+ v39 = (u)zdfakey_add;
+ v40 = vk;
+ v41 = *(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 32UL);
+ v42 = ((u(*)())v39)(v40, v41);
+ v42;
+b39: if (!*(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 40UL)) goto b55;
+ if ((s)*(u*)(*(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 40UL) + 16UL) > (s)vi) goto b59;
+ if ((s)*(u*)(*(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 40UL) + 24UL) < (s)vi) goto b59;
+ v44 = 1UL;
+b61: if (!v44) goto b55;
+ v43 = 1UL;
+b57: if (!v43) goto b53;
+ v45 = (u)zdfakey_add;
+ v46 = vk;
+ v47 = *(u*)(*(u*)(*(u*)(vd + 48UL) + vj * 8UL) + 40UL);
+ v48 = ((u(*)())v45)(v46, v47);
+ v48;
+b51: vj = vj + 1UL;
+ goto b34;
+b53: goto b51;
+b55: v43 = 0UL;
+ goto b57;
+b59: v44 = 0UL;
+ goto b61;
+b41: goto b39;
+b43: v37 = 0UL;
+ goto b45;
+b47: v38 = 0UL;
+ goto b49;
+b26: *(u*)(*(u*)(vd + 48UL) + vi * 8UL) = *(u*)(*(u*)(vk + 8UL) + vi * 8UL);
+ vi = vi + 1UL;
+ goto b22;
+b9: v12 = (u)zdfa_cmp;
+ v13 = vk;
+ v14 = vd;
+ v15 = ((u(*)())v12)(v13, v14);
+ vdir = v15;
+ if ((s)vdir >= (s)0UL) goto b16;
+ vlink = vd + 0UL + 16UL;
+ vp = vd + 0UL;
+b14: goto b5;
+b16: if ((s)vdir <= (s)0UL) goto b18;
+ vlink = vd + 0UL + 24UL;
+ vp = vd + 0UL;
+ goto b14;
+b18: return vd;
+b11: v11 = 1UL;
+ goto b12;
+}
u znfa_alt(u vc, u va, u vb) {
- u vret = 0;
u vn = 0;
u vtail = 0;
u vi = 0;
u vj = 0;
+ u v7 = 0;
u v8 = 0;
u v9 = 0;
u v10 = 0;
@@ -24513,67 +24944,88 @@ u znfa_alt(u vc, u va, u vb) {
u v22 = 0;
u v23 = 0;
u v24 = 0;
- u v25 = 0;
if (!va) goto b5;
- v8 = 0UL;
-b6: if (!v8) goto b3;
+ v7 = 0UL;
+b6: if (!v7) goto b3;
return vb;
b3: if (!vb) goto b11;
- v9 = 0UL;
-b12: if (!v9) goto b9;
+ v8 = 0UL;
+b12: if (!v8) goto b9;
return va;
-b9: v10 = (u)zalloc;
- v11 = *(u*)(vc + 0UL);
- v12 = 48UL;
- v13 = ((u(*)())v10)(v11, v12);
- vn = v13;
- *(u*)(vn + 8UL) = -1UL;
- *(u*)(vn + 16UL) = va;
- *(u*)(vn + 24UL) = vb;
- *(u*)(vn + 40UL) = *(u*)(va + 40UL) + *(u*)(vb + 40UL);
- v14 = (u)zalloc;
- v15 = *(u*)(vc + 0UL);
- v16 = 8UL * *(u*)(vn + 40UL);
- v17 = ((u(*)())v14)(v15, v16);
- *(u*)(vn + 32UL) = v17;
+b9: v9 = (u)zalloc;
+ v10 = *(u*)(vc + 0UL);
+ v11 = 72UL;
+ v12 = ((u(*)())v9)(v10, v11);
+ vn = v12;
+ *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
+ *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 16UL) = -1UL;
+ *(u*)(vn + 24UL) = -1UL;
+ *(u*)(vn + 32UL) = va;
+ *(u*)(vn + 40UL) = vb;
+ *(u*)(vn + 56UL) = *(u*)(va + 56UL) + *(u*)(vb + 56UL);
+ v13 = (u)zalloc;
+ v14 = *(u*)(vc + 0UL);
+ v15 = 8UL * *(u*)(vn + 56UL);
+ v16 = ((u(*)())v13)(v14, v15);
+ *(u*)(vn + 48UL) = v16;
vi = 0UL;
vj = 0UL;
-b15: if (vi != *(u*)(va + 40UL)) goto b19;
-b20: if (vi != *(u*)(vb + 40UL)) goto b24;
- v18 = (u)zfree;
- v19 = *(u*)(vc + 0UL);
- v20 = *(u*)(va + 32UL);
- v21 = ((u(*)())v18)(v19, v20);
- v21;
- v22 = (u)zfree;
- v23 = *(u*)(vc + 0UL);
- v24 = *(u*)(vb + 32UL);
- v25 = ((u(*)())v22)(v23, v24);
- v25;
- return vret;
-b24: *(u*)(*(u*)(vn + 32UL) + vj * 8UL) = *(u*)(*(u*)(vb + 32UL) + vi * 8UL);
+b15: if (vi != *(u*)(va + 56UL)) goto b19;
+ vi = 0UL;
+b20: if (vi != *(u*)(vb + 56UL)) goto b24;
+ v17 = (u)zfree;
+ v18 = *(u*)(vc + 0UL);
+ v19 = *(u*)(va + 48UL);
+ v20 = ((u(*)())v17)(v18, v19);
+ v20;
+ v21 = (u)zfree;
+ v22 = *(u*)(vc + 0UL);
+ v23 = *(u*)(vb + 48UL);
+ v24 = ((u(*)())v21)(v22, v23);
+ v24;
+ return vn;
+b24: *(u*)(*(u*)(vn + 48UL) + vj * 8UL) = *(u*)(*(u*)(vb + 48UL) + vi * 8UL);
vj = vj + 1UL;
vi = vi + 1UL;
goto b20;
-b19: *(u*)(*(u*)(vn + 32UL) + vj * 8UL) = *(u*)(*(u*)(va + 32UL) + vi * 8UL);
+b19: *(u*)(*(u*)(vn + 48UL) + vj * 8UL) = *(u*)(*(u*)(va + 48UL) + vi * 8UL);
vj = vj + 1UL;
vi = vi + 1UL;
goto b15;
-b11: v9 = 1UL;
+b11: v8 = 1UL;
goto b12;
-b5: v8 = 1UL;
+b5: v7 = 1UL;
goto b6;
}
u znfa_any(u vc) {
- u v1 = 0;
+ u vn = 0;
u v2 = 0;
u v3 = 0;
u v4 = 0;
- v1 = (u)znfa_lit;
- v2 = vc;
- v3 = -1UL;
- v4 = ((u(*)())v1)(v2, v3);
- return v4;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ v2 = (u)zalloc;
+ v3 = *(u*)(vc + 0UL);
+ v4 = 72UL;
+ v5 = ((u(*)())v2)(v3, v4);
+ vn = v5;
+ *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
+ *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 32UL) = 0UL;
+ *(u*)(vn + 16UL) = 0UL;
+ *(u*)(vn + 24UL) = 255UL;
+ v6 = (u)zalloc;
+ v7 = *(u*)(vc + 0UL);
+ v8 = 8UL;
+ v9 = ((u(*)())v6)(v7, v8);
+ *(u*)(vn + 48UL) = v9;
+ *(u*)(vn + 56UL) = 1UL;
+ *(u*)(*(u*)(vn + 48UL) + 0UL * 8UL) = vn;
+ return vn;
}
u znfa_concat(u vc, u va, u vb) {
u vi = 0;
@@ -24592,16 +25044,16 @@ b3: if (!vb) goto b11;
b12: if (!v5) goto b9;
return 0UL;
b9: vi = 0UL;
-b13: if (vi != *(u*)(va + 40UL)) goto b17;
+b13: if (vi != *(u*)(va + 56UL)) goto b17;
v6 = (u)zfree;
v7 = *(u*)(vc + 0UL);
- v8 = *(u*)(va + 32UL);
+ v8 = *(u*)(va + 48UL);
v9 = ((u(*)())v6)(v7, v8);
v9;
- *(u*)(va + 32UL) = *(u*)(vb + 32UL);
- *(u*)(va + 40UL) = *(u*)(vb + 40UL);
+ *(u*)(va + 48UL) = *(u*)(vb + 48UL);
+ *(u*)(va + 56UL) = *(u*)(vb + 56UL);
return va;
-b17: *(u*)(*(u*)(*(u*)(va + 32UL) + vi * 8UL) + 16UL) = vb;
+b17: *(u*)(*(u*)(*(u*)(va + 48UL) + vi * 8UL) + 32UL) = vb;
vi = vi + 1UL;
goto b13;
b11: v5 = 1UL;
@@ -24621,24 +25073,26 @@ u znfa_empty(u vc) {
u v9 = 0;
v2 = (u)zalloc;
v3 = *(u*)(vc + 0UL);
- v4 = 48UL;
+ v4 = 72UL;
v5 = ((u(*)())v2)(v3, v4);
vn = v5;
- *(u*)(vn + 8UL) = -1UL;
- *(u*)(vn + 16UL) = 0UL;
- *(u*)(vn + 24UL) = 0UL;
+ *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
+ *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 16UL) = -1UL;
+ *(u*)(vn + 24UL) = -1UL;
+ *(u*)(vn + 32UL) = 0UL;
+ *(u*)(vn + 40UL) = 0UL;
v6 = (u)zalloc;
v7 = *(u*)(vc + 0UL);
v8 = 8UL;
v9 = ((u(*)())v6)(v7, v8);
- *(u*)(vn + 32UL) = v9;
- *(u*)(vn + 40UL) = 1UL;
- *(u*)(*(u*)(vn + 32UL) + 0UL * 8UL) = vn;
+ *(u*)(vn + 48UL) = v9;
+ *(u*)(vn + 56UL) = 1UL;
+ *(u*)(*(u*)(vn + 48UL) + 0UL * 8UL) = vn;
return vn;
}
-u znfa_lit(u vc, u vch) {
+u znfa_lit(u vc, u vstart, u vend) {
u vn = 0;
- u v3 = 0;
u v4 = 0;
u v5 = 0;
u v6 = 0;
@@ -24646,20 +25100,24 @@ u znfa_lit(u vc, u vch) {
u v8 = 0;
u v9 = 0;
u v10 = 0;
- v3 = (u)zalloc;
- v4 = *(u*)(vc + 0UL);
- v5 = 48UL;
- v6 = ((u(*)())v3)(v4, v5);
- vn = v6;
- *(u*)(vn + 16UL) = 0UL;
- *(u*)(vn + 8UL) = vch;
- v7 = (u)zalloc;
- v8 = *(u*)(vc + 0UL);
- v9 = 8UL;
- v10 = ((u(*)())v7)(v8, v9);
- *(u*)(vn + 32UL) = v10;
- *(u*)(vn + 40UL) = 1UL;
- *(u*)(*(u*)(vn + 32UL) + 0UL * 8UL) = vn;
+ u v11 = 0;
+ v4 = (u)zalloc;
+ v5 = *(u*)(vc + 0UL);
+ v6 = 72UL;
+ v7 = ((u(*)())v4)(v5, v6);
+ vn = v7;
+ *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
+ *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 32UL) = 0UL;
+ *(u*)(vn + 16UL) = vstart;
+ *(u*)(vn + 24UL) = vend;
+ v8 = (u)zalloc;
+ v9 = *(u*)(vc + 0UL);
+ v10 = 8UL;
+ v11 = ((u(*)())v8)(v9, v10);
+ *(u*)(vn + 48UL) = v11;
+ *(u*)(vn + 56UL) = 1UL;
+ *(u*)(*(u*)(vn + 48UL) + 0UL * 8UL) = vn;
return vn;
}
u znfa_plus(u vc, u va) {
@@ -24684,19 +25142,22 @@ b6: if (!v3) goto b3;
return 0UL;
b3: v4 = (u)zalloc;
v5 = *(u*)(vc + 0UL);
- v6 = 48UL;
+ v6 = 72UL;
v7 = ((u(*)())v4)(v5, v6);
vb = v7;
- *(u*)(vb + 8UL) = -1UL;
- *(u*)(vb + 16UL) = 0UL;
- *(u*)(vb + 24UL) = va;
+ *(u*)(vb + 0UL) = *(u*)(vc + 112UL);
+ *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vb + 16UL) = -1UL;
+ *(u*)(vb + 24UL) = -1UL;
+ *(u*)(vb + 32UL) = 0UL;
+ *(u*)(vb + 40UL) = va;
v8 = (u)zalloc;
v9 = *(u*)(vc + 0UL);
v10 = 8UL;
v11 = ((u(*)())v8)(v9, v10);
- *(u*)(vb + 32UL) = v11;
- *(u*)(vb + 40UL) = 1UL;
- *(u*)(*(u*)(vb + 32UL) + 0UL * 8UL) = vb;
+ *(u*)(vb + 48UL) = v11;
+ *(u*)(vb + 56UL) = 1UL;
+ *(u*)(*(u*)(vb + 48UL) + 0UL * 8UL) = vb;
v12 = (u)znfa_concat;
v13 = vc;
v14 = va;
@@ -24734,27 +25195,30 @@ b6: if (!v4) goto b3;
return v7;
b3: v8 = (u)zalloc;
v9 = *(u*)(vc + 0UL);
- v10 = 48UL;
+ v10 = 72UL;
v11 = ((u(*)())v8)(v9, v10);
vb = v11;
- *(u*)(vb + 8UL) = -1UL;
- *(u*)(vb + 16UL) = 0UL;
- *(u*)(vb + 24UL) = va;
- *(u*)(vb + 40UL) = *(u*)(va + 40UL) + 1UL;
+ *(u*)(vb + 0UL) = *(u*)(vc + 112UL);
+ *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vb + 16UL) = -1UL;
+ *(u*)(vb + 24UL) = -1UL;
+ *(u*)(vb + 32UL) = 0UL;
+ *(u*)(vb + 40UL) = va;
+ *(u*)(vb + 56UL) = *(u*)(va + 56UL) + 1UL;
v12 = (u)zalloc;
v13 = *(u*)(vc + 0UL);
- v14 = 8UL * *(u*)(vb + 40UL);
+ v14 = 8UL * *(u*)(vb + 56UL);
v15 = ((u(*)())v12)(v13, v14);
- *(u*)(vb + 32UL) = v15;
-b10: if (vi != *(u*)(va + 40UL)) goto b14;
- *(u*)(*(u*)(vb + 32UL) + vi * 8UL) = vb;
+ *(u*)(vb + 48UL) = v15;
+b10: if (vi != *(u*)(va + 56UL)) goto b14;
+ *(u*)(*(u*)(vb + 48UL) + vi * 8UL) = vb;
v16 = (u)zfree;
v17 = *(u*)(vc + 0UL);
- v18 = *(u*)(va + 32UL);
+ v18 = *(u*)(va + 48UL);
v19 = ((u(*)())v16)(v17, v18);
v19;
return vb;
-b14: *(u*)(*(u*)(vb + 32UL) + vi * 8UL) = *(u*)(*(u*)(va + 32UL) + vi * 8UL);
+b14: *(u*)(*(u*)(vb + 48UL) + vi * 8UL) = *(u*)(*(u*)(va + 48UL) + vi * 8UL);
vi = vi + 1UL;
goto b10;
b5: v4 = 1UL;
diff --git a/cc1.om b/cc1.om
@@ -31,6 +31,10 @@ struct compiler {
// Usage stack
used_top: *decl;
+
+ nfa_id: int;
+ dfa_id: int;
+ dfa_cache: *rbnode;
}
func cshow_context(c: *compiler) {
diff --git a/cc4.om b/cc4.om
@@ -66,7 +66,7 @@ lexer {
CHAR = "'" ("\\" . | [[^\\\x27]])* "'";
CHARSET = "[[" ([[^\]\\]]|"\\".)* "]]";
- SPACE = ([[ \r\n\t]] | "//" [[^\n]]*)*;
+ SPACE = ([[ \r\n\t]] | "//" [[^\n]]*)+;
}
//lalr {
diff --git a/lexer.om b/lexer.om
@@ -1,12 +1,15 @@
// https://swtch.com/~rsc/regexp/regexp1.html
struct nfa {
+ id: int;
tag: int;
- ch: int;
+ start: int;
+ end: int;
out: *nfa;
alt: *nfa;
tail: **nfa;
ntail: int;
+ mark: int;
}
func nfa_concat(c: *compiler, a: *nfa, b:* nfa): *nfa {
@@ -40,7 +43,6 @@ func nfa_concat(c: *compiler, a: *nfa, b:* nfa): *nfa {
}
func nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa {
- var ret: *nfa;
var n: *nfa;
var tail: **nfa;
var i: int;
@@ -55,7 +57,10 @@ func nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa {
}
n = alloc(c.a, sizeof(*n)) as *nfa;
- n.ch = -1;
+ n.id = c.nfa_id;
+ c.nfa_id = c.nfa_id + 1;
+ n.start = -1;
+ n.end = -1;
n.out = a;
n.alt = b;
n.ntail = a.ntail + b.ntail;
@@ -74,6 +79,7 @@ func nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa {
i = i + 1;
}
+ i = 0;
loop {
if i == b.ntail {
break;
@@ -88,7 +94,7 @@ func nfa_alt(c: *compiler, a: *nfa, b: *nfa): *nfa {
free(c.a, a.tail as *byte);
free(c.a, b.tail as *byte);
- return ret;
+ return n;
}
func nfa_plus(c: *compiler, a: *nfa): *nfa {
@@ -97,7 +103,10 @@ func nfa_plus(c: *compiler, a: *nfa): *nfa {
return nil;
}
b = alloc(c.a, sizeof(*b)) as *nfa;
- b.ch = -1;
+ b.id = c.nfa_id;
+ c.nfa_id = c.nfa_id + 1;
+ b.start = -1;
+ b.end = -1;
b.out = nil;
b.alt = a;
b.tail = alloc(c.a, sizeof(*b.tail)) as **nfa;
@@ -113,7 +122,10 @@ func nfa_qmark(c: *compiler, a: *nfa): *nfa {
return nfa_empty(c);
}
b = alloc(c.a, sizeof(*b)) as *nfa;
- b.ch = -1;
+ b.id = c.nfa_id;
+ c.nfa_id = c.nfa_id + 1;
+ b.start = -1;
+ b.end = -1;
b.out = nil;
b.alt = a;
b.ntail = a.ntail + 1;
@@ -135,11 +147,14 @@ func nfa_star(c: *compiler, a: *nfa): *nfa {
return nfa_qmark(c, a);
}
-func nfa_lit(c: *compiler, ch: int): *nfa {
+func nfa_lit(c: *compiler, start: int, end: int): *nfa {
var n: *nfa;
n = alloc(c.a, sizeof(*n)) as *nfa;
+ n.id = c.nfa_id;
+ c.nfa_id = c.nfa_id + 1;
n.out = nil;
- n.ch = ch;
+ n.start = start;
+ n.end = end;
n.tail = alloc(c.a, sizeof(*n.tail)) as **nfa;
n.ntail = 1;
n.tail[0] = n;
@@ -149,7 +164,10 @@ func nfa_lit(c: *compiler, ch: int): *nfa {
func nfa_empty(c: *compiler): *nfa {
var n: *nfa;
n = alloc(c.a, sizeof(*n)) as *nfa;
- n.ch = -1;
+ n.id = c.nfa_id;
+ c.nfa_id = c.nfa_id + 1;
+ n.start = -1;
+ n.end = -1;
n.out = nil;
n.alt = nil;
n.tail = alloc(c.a, sizeof(*n.tail)) as **nfa;
@@ -159,7 +177,17 @@ func nfa_empty(c: *compiler): *nfa {
}
func nfa_any(c: *compiler): *nfa {
- return nfa_lit(c, -1);
+ var n: *nfa;
+ n = alloc(c.a, sizeof(*n)) as *nfa;
+ n.id = c.nfa_id;
+ c.nfa_id = c.nfa_id + 1;
+ n.out = nil;
+ n.start = 0;
+ n.end = 255;
+ n.tail = alloc(c.a, sizeof(*n.tail)) as **nfa;
+ n.ntail = 1;
+ n.tail[0] = n;
+ return n;
}
func lexer_compile(c: *compiler, pn: *peg_node, err: *file) {
@@ -177,6 +205,8 @@ func lexer_compile(c: *compiler, pn: *peg_node, err: *file) {
pn = pn.next;
}
+
+ lexer_explode(c, a);
}
func lexer_compile_rule(c: *compiler, pn: *peg_node): *nfa {
@@ -328,7 +358,7 @@ func lexer_compile_str(c: *compiler, pn: *peg_node): *nfa {
if !ok {
die("lexer_compile_str: invalid escape");
}
- b = nfa_lit(c, ch);
+ b = nfa_lit(c, ch, ch);
a = nfa_concat(c, a, b);
}
return a;
@@ -354,6 +384,20 @@ func lexer_compile_charset(c: *compiler, pn: *peg_node): *nfa {
i = 2;
len = pn.len - 2;
x = 1;
+
+ if s[i] == '^' as byte {
+ x = 0;
+ j = 0;
+ loop {
+ if j == 256 {
+ break;
+ }
+ scratch[j] = 1 as byte;
+ j = j + 1;
+ }
+ i = i + 1;
+ }
+
loop {
if i == len {
break;
@@ -402,14 +446,395 @@ func lexer_compile_charset(c: *compiler, pn: *peg_node): *nfa {
if j == 256 {
break;
}
- if scratch[j] {
- b = nfa_lit(c, j);
- a = nfa_alt(c, a, b);
+
+ if !scratch[j] {
+ j = j + 1;
+ continue;
}
- j = j + 1;
+
+ start = j;
+
+ loop {
+ if j == 256 || !scratch[j] {
+ break;
+ }
+ j = j + 1;
+ }
+
+ end = j - 1;
+
+ b = nfa_lit(c, start, end);
+ a = nfa_alt(c, a, b);
}
free(c.a, scratch);
return a;
}
+
+func nfa_show(n: *nfa) {
+ fputs(nil, "digraph {\n");
+ nfa_show2(n);
+ fputs(nil, "}\n");
+}
+
+func nfa_show2(n: *nfa) {
+ if !n {
+ return;
+ }
+
+ if n.mark {
+ return;
+ }
+
+ n.mark = 1;
+
+ fputd(nil, n.id);
+ if n.tag != 0 {
+ fputs(nil, "[shape=doublecircle]");
+ }
+ fputs(nil, ";\n");
+
+ if n.out {
+ fputd(nil, n.id);
+ fputs(nil, "->");
+ fputd(nil, n.out.id);
+ if n.out.start != -1 {
+ fputs(nil, "[label=\"'");
+ if n.out.start == '"' || n.out.start == '\\' {
+ fputs(nil, "\\");
+ }
+ if n.out.start != 0 {
+ fputc(nil, n.out.start);
+ } else {
+ fputs(nil, "\\0");
+ }
+ fputs(nil, "'\"]");
+ }
+ fputs(nil, ";\n");
+ nfa_show2(n.out);
+ }
+
+ if n.alt {
+ fputd(nil, n.id);
+ fputs(nil, "->");
+ fputd(nil, n.alt.id);
+ if n.alt.start != -1 {
+ fputs(nil, "[label=\"'");
+ if n.alt.start == '"' || n.alt.start == '\\' {
+ fputs(nil, "\\");
+ }
+ if n.alt.start != 0 {
+ fputc(nil, n.alt.start);
+ } else {
+ fputs(nil, "\\0");
+ }
+ fputs(nil, "'\"]");
+ }
+ fputs(nil, ";\n");
+ nfa_show2(n.alt);
+ }
+}
+
+struct dfa {
+ node: rbnode;
+ id: int;
+ link: **dfa;
+ key: **nfa;
+ keylen: int;
+ mark: int;
+ tag: int;
+}
+
+struct dfakey {
+ a: *alloc;
+ live: **nfa;
+ len: int;
+ cap: int;
+ tag: int;
+}
+
+func dfakey_add(k: *dfakey, n: *nfa) {
+ var tmp: **nfa;
+
+ if !n || n.mark {
+ return;
+ }
+ n.mark = 1;
+
+ if !k.tag || (n.tag && n.tag < k.tag) {
+ k.tag = n.tag;
+ }
+
+ if k.len == k.cap {
+ if k.cap == 0 {
+ k.cap = 16;
+ } else {
+ k.cap = k.cap * 2;
+ }
+ tmp = alloc(k.a, sizeof(*k.live) * k.cap) as **nfa;
+ memcpy(tmp as *byte, k.live as *byte, sizeof(*k.live) * k.len);
+ free(k.a, k.live as *byte);
+ k.live = tmp;
+ }
+
+ k.live[k.len] = n;
+ k.len = k.len + 1;
+
+ if n.out && n.out.start == -1 {
+ dfakey_add(k, n.out);
+ }
+ if n.alt && n.alt.start == -1 {
+ dfakey_add(k, n.alt);
+ }
+}
+
+func dfakey_clear(k: *dfakey) {
+ var i: int;
+
+ i = 0;
+ loop {
+ if i == k.len {
+ break;
+ }
+
+ k.live[i].mark = 0;
+
+ i = i + 1;
+ }
+
+ k.tag = 0;
+ k.len = 0;
+}
+
+func dfakey_sort(k: *dfakey) {
+ var i: int;
+ var j: int;
+ var tmp: *nfa;
+
+ i = 1;
+ loop {
+ if i >= k.len {
+ break;
+ }
+ j = i;
+ tmp = k.live[j];
+ loop {
+ if j == 0 || k.live[j - 1].id <= tmp.id {
+ break;
+ }
+ k.live[j] = k.live[j - 1];
+ j = j - 1;
+ }
+ k.live[j] = tmp;
+ i = i + 1;
+ }
+}
+
+func lexer_explode(c: *compiler, n: *nfa): *dfa {
+ var _k: dfakey;
+ var k: *dfakey;
+ var ret: *dfa;
+
+ k = &_k;
+ k.a = c.a;
+
+ if n.start != -1 {
+ die("expected epsilon");
+ }
+
+ dfakey_add(k, n);
+
+ ret = nfa2dfa(c, k);
+
+ free(k.a, k.live as *byte);
+
+ return ret;
+}
+
+func nfa2dfa(c: *compiler, k: *dfakey): *dfa {
+ var d: *dfa;
+ var p: *rbnode;
+ var link: **rbnode;
+ var dir: int;
+ var i: int;
+ var j: int;
+
+ if k.len == 0 {
+ return nil;
+ }
+
+ dfakey_sort(k);
+
+ p = nil;
+ link = &c.dfa_cache;
+ loop {
+ d = (*link) as *dfa;
+ if !d {
+ break;
+ }
+
+ dir = dfa_cmp(k, d);
+
+ if dir < 0 {
+ link = &d.node.left;
+ p = &d.node;
+ } else if dir > 0 {
+ link = &d.node.right;
+ p = &d.node;
+ } else {
+ return d;
+ }
+ }
+
+ d = alloc(c.a, sizeof(*d)) as *dfa;
+ d.link = alloc(c.a, sizeof(*d.link) * 256) as **dfa;
+ d.key = alloc(c.a, sizeof(*d.key) * k.len) as **nfa;
+ d.keylen = k.len;
+ d.id = c.dfa_id;
+ c.dfa_id = c.dfa_id + 1;
+
+ i = 0;
+ loop {
+ if i == k.len {
+ break;
+ }
+ d.key[i] = k.live[i];
+ i = i + 1;
+ }
+ d.tag = k.tag;
+
+ rb_link((&c.dfa_cache) as **rbnode, link, p, &d.node);
+
+ i = 0;
+ loop {
+ if i == 256 {
+ break;
+ }
+
+ dfakey_clear(k);
+ j = 0;
+ loop {
+ if j == d.keylen {
+ break;
+ }
+
+ if d.key[j].out && d.key[j].out.start <= i && d.key[j].out.end >= i {
+ dfakey_add(k, d.key[j].out);
+ }
+ if d.key[j].alt && d.key[j].alt.start <= i && d.key[j].alt.end >= i {
+ dfakey_add(k, d.key[j].alt);
+ }
+
+ j = j + 1;
+ }
+
+ d.link[i] = nfa2dfa(c, k);
+ i = i + 1;
+ }
+
+ return d;
+}
+
+func dfa_cmp(k: *dfakey, d: *dfa): int {
+ var i: int;
+
+ i = 0;
+ loop {
+ if i == k.len && i == d.keylen {
+ break;
+ }
+
+ if i == k.len {
+ return -1;
+ }
+
+ if i == d.keylen {
+ return 1;
+ }
+
+ if k.live[i].id > d.key[i].id {
+ return 1;
+ }
+
+ if k.live[i].id < d.key[i].id {
+ return -1;
+ }
+
+ i = i + 1;
+ }
+
+ return 0;
+}
+
+func dfa_show(d: *dfa) {
+ fputs(nil, "digraph {\n");
+ dfa_show2(d);
+ fputs(nil, "}\n");
+}
+
+func dfa_show2(d: *dfa) {
+ var i: int;
+ var start: int;
+ var end: int;
+ var o: *dfa;
+ if !d || d.mark {
+ return;
+ }
+ d.mark = 1;
+
+ fputd(nil, d.id);
+ if d.tag != 0 {
+ fputs(nil, "[shape=doublecircle]");
+ }
+ fputs(nil, ";\n");
+
+ i = 0;
+ loop {
+ if i == 256 {
+ break;
+ }
+
+ if !d.link[i] {
+ i = i + 1;
+ continue;
+ }
+
+ start = i;
+ o = d.link[i];
+
+ loop {
+ if i == 256 || d.link[i] != o {
+ break;
+ }
+ i = i + 1;
+ }
+
+ end = i - 1;
+
+ fputd(nil, d.id);
+ fputs(nil, "->");
+ fputd(nil, o.id);
+ fputs(nil, "[label=\"");
+ if start >= ' ' && start <= '~' && start != '"' && start != '\\' {
+ fputc(nil, start);
+ } else {
+ fputs(nil, "\\\\x");
+ fputh(nil, start >> 4);
+ fputh(nil, start & 15);
+ }
+ if start != end {
+ fputs(nil, "-");
+ if end >= ' ' && end <= '~' && end != '"' && end != '\\' {
+ fputc(nil, end);
+ } else {
+ fputs(nil, "\\\\x");
+ fputh(nil, end >> 4);
+ fputh(nil, end & 15);
+ }
+ }
+ fputs(nil, "\"];\n");
+
+ dfa_show2(o);
+ }
+}