commit da2f6fae3ef648e090af395192959965d30d55db
parent 096147389d9be724b7aed58cc12e0331a6614f07
Author: erai <erai@omiltem.net>
Date: Mon, 7 Apr 2025 21:24:26 +0000
hopcroft minimization
Diffstat:
M | alloc.om | | | 29 | +++++++++++++++++++++++++++++ |
M | cc0.c | | | 418 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
M | cc1.om | | | 1 | + |
M | lexer.om | | | 223 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
4 files changed, 651 insertions(+), 20 deletions(-)
diff --git a/alloc.om b/alloc.om
@@ -84,3 +84,32 @@ func alloc(c: *alloc, size: int): *byte {
func free(a: *alloc, p: *byte): void {
}
+
+func ensure_arr(a: *alloc, arr: **void, cap: *int, need: int, elen: int) {
+ var new_arr: *byte;
+ var new_cap: int;
+
+ need = need * elen;
+
+ new_cap = *cap;
+ if need < new_cap {
+ return;
+ }
+
+ loop {
+ if need <= new_cap {
+ break;
+ }
+ new_cap = new_cap * 2 + 16;
+ }
+
+ new_arr = alloc(a, new_cap);
+
+ if *cap {
+ memcpy(new_arr, (*arr) as *byte, *cap);
+ free(a, (*arr) as *byte);
+ }
+
+ *arr = new_arr as *void;
+ *cap = new_cap;
+}
diff --git a/cc0.c b/cc0.c
@@ -49,9 +49,14 @@ u zdefstruct();
u zdefun();
u zdefunion();
u zdfa_cmp();
+u zdfa_minimize();
u zdfakey_add();
u zdfakey_clear();
u zdfakey_sort();
+u zdfamin_extract();
+u zdfamin_refine();
+u zdfamin_setup();
+u zdfamin_split();
u zdie();
u zemit();
u zemit_align();
@@ -63,6 +68,7 @@ u zemit_kstart();
u zemit_sections();
u zemit_ssr();
u zemit_strtab_str();
+u zensure_arr();
u zenter();
u zexit();
u zexpr_to_ir();
@@ -2839,7 +2845,7 @@ u zcomp_setup(u va, u verr) {
u v13 = 0;
v3 = (u)zalloc;
v4 = va;
- v5 = 136UL;
+ v5 = 144UL;
v6 = ((u(*)())v3)(v4, v5);
vc = v6;
*(u*)(vc + 0UL) = va;
@@ -4047,6 +4053,37 @@ b21: vi = vi + 1UL;
b7: v3 = 0UL;
goto b9;
}
+u zdfa_minimize(u vc, u vd) {
+ u v_ctx[5] = {0};
+ u vctx = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ vctx = (u)v_ctx;
+ *(u*)(vctx + 0UL) = *(u*)(vc + 0UL);
+ v4 = (u)zdfamin_setup;
+ v5 = vctx;
+ v6 = vd;
+ v7 = ((u(*)())v4)(v5, v6);
+ v7;
+ v8 = (u)zdfamin_refine;
+ v9 = vctx;
+ v10 = ((u(*)())v8)(v9);
+ v10;
+ v11 = (u)zdfamin_extract;
+ v12 = vctx;
+ v13 = vd;
+ v14 = ((u(*)())v11)(v12, v13);
+ return v14;
+}
u zdfakey_add(u vk, u vn) {
u vtmp = 0;
u v3 = 0;
@@ -4197,6 +4234,296 @@ b12: if ((s)*(u*)(*(u*)(*(u*)(vk + 8UL) + (vj - 1UL) * 8UL) + 0UL) > (s)*(u*)(vt
b15: v4 = 0UL;
goto b14;
}
+u zdfamin_extract(u vctx, u vd) {
+ u vs = 0;
+ u vg = 0;
+ u vch = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ if (!vd) goto b5;
+ v5 = 0UL;
+b6: if (!v5) goto b3;
+ return 0UL;
+b3: vs = *(u*)(*(u*)(vctx + 8UL) + (*(u*)(vd + 64UL) - 1UL) * 8UL);
+ if (!*(u*)(vs + 40UL)) goto b9;
+ return *(u*)(vs + 40UL);
+b9: v6 = (u)zalloc;
+ v7 = *(u*)(vctx + 0UL);
+ v8 = 80UL;
+ v9 = ((u(*)())v6)(v7, v8);
+ vg = v9;
+ *(u*)(vg + 32UL) = *(u*)(vd + 64UL) - 1UL;
+ *(u*)(vg + 72UL) = *(u*)(vd + 72UL);
+ v10 = (u)zalloc;
+ v11 = *(u*)(vctx + 0UL);
+ v12 = 8UL * 256UL;
+ v13 = ((u(*)())v10)(v11, v12);
+ *(u*)(vg + 40UL) = v13;
+ *(u*)(vs + 40UL) = vg;
+ vch = 0UL;
+b12: if (vch != 256UL) goto b16;
+ return vg;
+b16: if (*(u*)(*(u*)(vd + 40UL) + vch * 8UL) == 0UL) goto b19;
+ v14 = (u)zdfamin_extract;
+ v15 = vctx;
+ v16 = *(u*)(*(u*)(vd + 40UL) + vch * 8UL);
+ v17 = ((u(*)())v14)(v15, v16);
+ *(u*)(*(u*)(vg + 40UL) + vch * 8UL) = v17;
+b17: vch = vch + 1UL;
+ goto b12;
+b19: goto b17;
+b5: v5 = 1UL;
+ goto b6;
+}
+u zdfamin_refine(u vctx) {
+ u vs = 0;
+ u vi = 0;
+ u vp = 0;
+ u vch = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+b1: vs = *(u*)(vctx + 32UL);
+ if (!vs) goto b7;
+ v5 = 0UL;
+b8: if (!v5) goto b5;
+ return 0UL;
+b5: *(u*)(vctx + 32UL) = *(u*)(vs + 0UL);
+ *(u*)(vs + 8UL) = 0UL;
+ vi = 0UL;
+b9: if (vi != *(u*)(vctx + 16UL)) goto b13;
+ goto b1;
+b13: vp = *(u*)(*(u*)(vctx + 8UL) + vi * 8UL);
+ vch = 0UL;
+b14: if (vch != 256UL) goto b18;
+ vi = vi + 1UL;
+ goto b9;
+b18: v6 = (u)zdfamin_split;
+ v7 = vctx;
+ v8 = vp;
+ v9 = vs;
+ v10 = vch;
+ v11 = ((u(*)())v6)(v7, v8, v9, v10);
+ v11;
+ vch = vch + 1UL;
+ goto b14;
+b7: v5 = 1UL;
+ goto b8;
+}
+u zdfamin_setup(u vctx, u vd) {
+ u vi = 0;
+ u vs = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ u v26 = 0;
+ u v27 = 0;
+ u v28 = 0;
+ if (!vd) goto b9;
+ v5 = 0UL;
+b10: if (!v5) goto b5;
+ v4 = 1UL;
+b7: if (!v4) goto b3;
+ return 0UL;
+b3: *(u*)(vd + 64UL) = *(u*)(vd + 72UL) + 1UL;
+ v6 = (u)zensure_arr;
+ v7 = *(u*)(vctx + 0UL);
+ v8 = vctx + 8UL;
+ v9 = vctx + 24UL;
+ v10 = *(u*)(vd + 72UL) + 1UL;
+ v11 = 8UL;
+ v12 = ((u(*)())v6)(v7, v8, v9, v10, v11);
+ v12;
+ vs = *(u*)(*(u*)(vctx + 8UL) + *(u*)(vd + 72UL) * 8UL);
+ if (!vs) goto b17;
+ v13 = 0UL;
+b18: if (!v13) goto b15;
+ v14 = (u)zalloc;
+ v15 = *(u*)(vctx + 0UL);
+ v16 = 48UL;
+ v17 = ((u(*)())v14)(v15, v16);
+ vs = v17;
+ *(u*)(vs + 0UL) = *(u*)(vctx + 32UL);
+ *(u*)(vctx + 32UL) = vs;
+ *(u*)(vs + 8UL) = 1UL;
+ *(u*)(*(u*)(vctx + 8UL) + *(u*)(vd + 72UL) * 8UL) = vs;
+ if ((s)*(u*)(vd + 72UL) < (s)*(u*)(vctx + 16UL)) goto b22;
+ *(u*)(vctx + 16UL) = *(u*)(vd + 72UL) + 1UL;
+b20:b13: v18 = (u)zensure_arr;
+ v19 = *(u*)(vctx + 0UL);
+ v20 = vs + 16UL;
+ v21 = vs + 32UL;
+ v22 = *(u*)(vs + 24UL) + 1UL;
+ v23 = 8UL;
+ v24 = ((u(*)())v18)(v19, v20, v21, v22, v23);
+ v24;
+ *(u*)(*(u*)(vs + 16UL) + *(u*)(vs + 24UL) * 8UL) = vd;
+ *(u*)(vs + 24UL) = *(u*)(vs + 24UL) + 1UL;
+ vi = 0UL;
+b24: if (vi != 256UL) goto b28;
+ return 0UL;
+b28: v25 = (u)zdfamin_setup;
+ v26 = vctx;
+ v27 = *(u*)(*(u*)(vd + 40UL) + vi * 8UL);
+ v28 = ((u(*)())v25)(v26, v27);
+ v28;
+ vi = vi + 1UL;
+ goto b24;
+b22: goto b20;
+b15: goto b13;
+b17: v13 = 1UL;
+ goto b18;
+b5: if (!*(u*)(vd + 64UL)) goto b11;
+ v4 = 1UL;
+ goto b7;
+b11: v4 = 0UL;
+ goto b7;
+b9: v5 = 1UL;
+ goto b10;
+}
+u zdfamin_split(u vctx, u vp, u vs, u vch) {
+ u vi = 0;
+ u vk = 0;
+ u vm = 0;
+ u vd = 0;
+ u ve = 0;
+ u vnew_ind = 0;
+ u vt = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ u v26 = 0;
+ u v27 = 0;
+ u v28 = 0;
+ u v29 = 0;
+ vi = 0UL;
+ vk = 0UL;
+b1: if (vi != *(u*)(vp + 24UL)) goto b5;
+ if (vk != 0UL) goto b17;
+ v12 = 1UL;
+b19: if (!v12) goto b15;
+ return 0UL;
+b15: v13 = (u)zalloc;
+ v14 = *(u*)(vctx + 0UL);
+ v15 = 8UL * (*(u*)(vp + 24UL) - vk);
+ v16 = ((u(*)())v13)(v14, v15);
+ vnew_ind = v16;
+ vi = 0UL;
+ vk = 0UL;
+ vm = 0UL;
+b22: if (vi != *(u*)(vp + 24UL)) goto b26;
+ *(u*)(vp + 24UL) = vk;
+ v18 = (u)zensure_arr;
+ v19 = *(u*)(vctx + 0UL);
+ v20 = vctx + 8UL;
+ v21 = vctx + 24UL;
+ v22 = *(u*)(vctx + 16UL) + 1UL;
+ v23 = 8UL;
+ v24 = ((u(*)())v18)(v19, v20, v21, v22, v23);
+ v24;
+ v25 = (u)zalloc;
+ v26 = *(u*)(vctx + 0UL);
+ v27 = 48UL;
+ v28 = ((u(*)())v25)(v26, v27);
+ vt = v28;
+ *(u*)(vt + 0UL) = *(u*)(vctx + 32UL);
+ *(u*)(vt + 16UL) = vnew_ind;
+ *(u*)(vt + 24UL) = vm;
+ *(u*)(vt + 32UL) = vm;
+ *(u*)(vctx + 32UL) = vt;
+ *(u*)(vt + 8UL) = 1UL;
+ *(u*)(*(u*)(vctx + 8UL) + *(u*)(vctx + 16UL) * 8UL) = vt;
+ *(u*)(vctx + 16UL) = *(u*)(vctx + 16UL) + 1UL;
+ if (!*(u*)(vp + 8UL)) goto b40;
+ v29 = 0UL;
+b41: if (!v29) goto b38;
+ *(u*)(vp + 0UL) = *(u*)(vctx + 32UL);
+ *(u*)(vctx + 32UL) = vp;
+ *(u*)(vp + 8UL) = 1UL;
+b36: return 0UL;
+b38: goto b36;
+b40: v29 = 1UL;
+ goto b41;
+b26: vd = *(u*)(*(u*)(vp + 16UL) + vi * 8UL);
+ ve = *(u*)(*(u*)(vd + 40UL) + vch * 8UL);
+ if (!ve) goto b31;
+ if (*(u*)(*(u*)(vctx + 8UL) + (*(u*)(ve + 64UL) - 1UL) * 8UL) != vs) goto b31;
+ v17 = 1UL;
+b33: if (!v17) goto b29;
+ *(u*)(*(u*)(vp + 16UL) + vk * 8UL) = vd;
+ vk = vk + 1UL;
+b27: vi = vi + 1UL;
+ goto b22;
+b29: *(u*)(vnew_ind + vm * 8UL) = vd;
+ *(u*)(vd + 64UL) = *(u*)(vctx + 16UL) + 1UL;
+ vm = vm + 1UL;
+ goto b27;
+b31: v17 = 0UL;
+ goto b33;
+b17: if (vk != *(u*)(vp + 24UL)) goto b20;
+ v12 = 1UL;
+ goto b19;
+b20: v12 = 0UL;
+ goto b19;
+b5: vd = *(u*)(*(u*)(vp + 16UL) + vi * 8UL);
+ ve = *(u*)(*(u*)(vd + 40UL) + vch * 8UL);
+ if (!ve) goto b10;
+ if (*(u*)(*(u*)(vctx + 8UL) + (*(u*)(ve + 64UL) - 1UL) * 8UL) != vs) goto b10;
+ v11 = 1UL;
+b12: if (!v11) goto b8;
+ vk = vk + 1UL;
+b6: vi = vi + 1UL;
+ goto b1;
+b8: goto b6;
+b10: v11 = 0UL;
+ goto b12;
+}
u zdie(u vmsg) {
u vlen = 0;
u v2 = 0;
@@ -14136,6 +14463,51 @@ b6: vi = vi + 1UL;
b8: v7 = 1UL;
goto b9;
}
+u zensure_arr(u va, u varr, u vcap, u vneed, u velen) {
+ u vnew_arr = 0;
+ u vnew_cap = 0;
+ u v7 = 0;
+ u v8 = 0;
+ u v9 = 0;
+ u v10 = 0;
+ u v11 = 0;
+ u v12 = 0;
+ u v13 = 0;
+ u v14 = 0;
+ u v15 = 0;
+ u v16 = 0;
+ u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ vneed = vneed * velen;
+ vnew_cap = *(u*)vcap;
+ if ((s)vneed >= (s)vnew_cap) goto b3;
+ return 0UL;
+b3:b4: if ((s)vneed > (s)vnew_cap) goto b8;
+ v7 = (u)zalloc;
+ v8 = va;
+ v9 = vnew_cap;
+ v10 = ((u(*)())v7)(v8, v9);
+ vnew_arr = v10;
+ if (!*(u*)vcap) goto b12;
+ v11 = (u)zmemcpy;
+ v12 = vnew_arr;
+ v13 = *(u*)varr;
+ v14 = *(u*)vcap;
+ v15 = ((u(*)())v11)(v12, v13, v14);
+ v15;
+ v16 = (u)zfree;
+ v17 = va;
+ v18 = *(u*)varr;
+ v19 = ((u(*)())v16)(v17, v18);
+ v19;
+b10: *(u*)varr = vnew_arr;
+ *(u*)vcap = vnew_cap;
+ return 0UL;
+b12: goto b10;
+b8: vnew_cap = vnew_cap * 2UL + 16UL;
+ goto b4;
+}
u zenter(u vc, u vtag) {
u v2 = 0;
u v3 = 0;
@@ -23312,7 +23684,8 @@ u zlexer_compile_rule(u vc, u vpn) {
v11 = vc;
v12 = ((u(*)())v10)(v11);
vb = v12;
- *(u*)(vb + 8UL) = 1UL;
+ *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vb + 8UL) = *(u*)(vc + 112UL);
v13 = (u)znfa_concat;
v14 = vc;
v15 = va;
@@ -23485,6 +23858,10 @@ u zlexer_explode(u vc, u vn) {
u v17 = 0;
u v18 = 0;
u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
vk = (u)v_k;
*(u*)(vk + 0UL) = *(u*)(vc + 0UL);
if (*(u*)(vn + 16UL) == -1UL) goto b3;
@@ -23507,6 +23884,11 @@ b1: v8 = (u)zdfakey_add;
v18 = *(u*)(vk + 8UL);
v19 = ((u(*)())v16)(v17, v18);
v19;
+ v20 = (u)zdfa_minimize;
+ v21 = vc;
+ v22 = vret;
+ v23 = ((u(*)())v20)(v21, v22);
+ vret = v23;
return vret;
b3: goto b1;
}
@@ -24816,7 +25198,7 @@ b3: v8 = (u)zdfakey_sort;
v10 = ((u(*)())v8)(v9);
v10;
vp = 0UL;
- vlink = vc + 128UL;
+ vlink = vc + 136UL;
b5: vd = *(u*)vlink;
if (!vd) goto b11;
v11 = 0UL;
@@ -24837,13 +25219,13 @@ b12: if (!v11) goto b9;
v27 = ((u(*)())v24)(v25, v26);
*(u*)(vd + 48UL) = v27;
*(u*)(vd + 56UL) = *(u*)(vk + 16UL);
- *(u*)(vd + 32UL) = *(u*)(vc + 120UL);
- *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
+ *(u*)(vd + 32UL) = *(u*)(vc + 128UL);
+ *(u*)(vc + 128UL) = *(u*)(vc + 128UL) + 1UL;
vi = 0UL;
b22: if (vi != *(u*)(vk + 16UL)) goto b26;
*(u*)(vd + 72UL) = *(u*)(vk + 32UL);
v28 = (u)zrb_link;
- v29 = vc + 128UL;
+ v29 = vc + 136UL;
v30 = vlink;
v31 = vp;
v32 = vd + 0UL;
@@ -24957,8 +25339,8 @@ b9: v9 = (u)zalloc;
v11 = 72UL;
v12 = ((u(*)())v9)(v10, v11);
vn = v12;
- *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
- *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 0UL) = *(u*)(vc + 120UL);
+ *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
*(u*)(vn + 16UL) = -1UL;
*(u*)(vn + 24UL) = -1UL;
*(u*)(vn + 32UL) = va;
@@ -25013,8 +25395,8 @@ u znfa_any(u vc) {
v4 = 72UL;
v5 = ((u(*)())v2)(v3, v4);
vn = v5;
- *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
- *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 0UL) = *(u*)(vc + 120UL);
+ *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
*(u*)(vn + 32UL) = 0UL;
*(u*)(vn + 16UL) = 0UL;
*(u*)(vn + 24UL) = 255UL;
@@ -25076,8 +25458,8 @@ u znfa_empty(u vc) {
v4 = 72UL;
v5 = ((u(*)())v2)(v3, v4);
vn = v5;
- *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
- *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 0UL) = *(u*)(vc + 120UL);
+ *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
*(u*)(vn + 16UL) = -1UL;
*(u*)(vn + 24UL) = -1UL;
*(u*)(vn + 32UL) = 0UL;
@@ -25106,8 +25488,8 @@ u znfa_lit(u vc, u vstart, u vend) {
v6 = 72UL;
v7 = ((u(*)())v4)(v5, v6);
vn = v7;
- *(u*)(vn + 0UL) = *(u*)(vc + 112UL);
- *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vn + 0UL) = *(u*)(vc + 120UL);
+ *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
*(u*)(vn + 32UL) = 0UL;
*(u*)(vn + 16UL) = vstart;
*(u*)(vn + 24UL) = vend;
@@ -25145,8 +25527,8 @@ b3: v4 = (u)zalloc;
v6 = 72UL;
v7 = ((u(*)())v4)(v5, v6);
vb = v7;
- *(u*)(vb + 0UL) = *(u*)(vc + 112UL);
- *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vb + 0UL) = *(u*)(vc + 120UL);
+ *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
*(u*)(vb + 16UL) = -1UL;
*(u*)(vb + 24UL) = -1UL;
*(u*)(vb + 32UL) = 0UL;
@@ -25198,8 +25580,8 @@ b3: v8 = (u)zalloc;
v10 = 72UL;
v11 = ((u(*)())v8)(v9, v10);
vb = v11;
- *(u*)(vb + 0UL) = *(u*)(vc + 112UL);
- *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
+ *(u*)(vb + 0UL) = *(u*)(vc + 120UL);
+ *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL;
*(u*)(vb + 16UL) = -1UL;
*(u*)(vb + 24UL) = -1UL;
*(u*)(vb + 32UL) = 0UL;
diff --git a/cc1.om b/cc1.om
@@ -32,6 +32,7 @@ struct compiler {
// Usage stack
used_top: *decl;
+ lex_id: int;
nfa_id: int;
dfa_id: int;
dfa_cache: *rbnode;
diff --git a/lexer.om b/lexer.om
@@ -220,7 +220,8 @@ func lexer_compile_rule(c: *compiler, pn: *peg_node): *nfa {
a = lexer_compile_pattern(c, pat);
b = nfa_empty(c);
- b.tag = 1;
+ c.lex_id = c.lex_id + 1;
+ b.tag = c.lex_id;
return nfa_concat(c, a, b);
}
@@ -649,6 +650,8 @@ func lexer_explode(c: *compiler, n: *nfa): *dfa {
free(k.a, k.live as *byte);
+ ret = dfa_minimize(c, ret);
+
return ret;
}
@@ -785,7 +788,11 @@ func dfa_show2(d: *dfa) {
fputd(nil, d.id);
if d.tag != 0 {
- fputs(nil, "[shape=doublecircle]");
+ fputs(nil, "[label=\"");
+ fputd(nil, d.tag);
+ fputs(nil, "\" shape=doublecircle]");
+ } else {
+ fputs(nil, "[label=\"\"]");
}
fputs(nil, ";\n");
@@ -838,3 +845,215 @@ func dfa_show2(d: *dfa) {
dfa_show2(o);
}
}
+
+struct dfamin {
+ a: *alloc;
+ sets: **dfaset;
+ nsets: int;
+ capsets: int;
+ queue: *dfaset;
+}
+
+struct dfaset {
+ next: *dfaset;
+ inqueue: int;
+ ind: **dfa;
+ ind_len: int;
+ ind_cap: int;
+ node: *dfa;
+}
+
+func dfa_minimize(c: *compiler, d: *dfa): *dfa {
+ var _ctx: dfamin;
+ var ctx: *dfamin;
+ ctx = &_ctx;
+ ctx.a = c.a;
+ dfamin_setup(ctx, d);
+ dfamin_refine(ctx);
+ return dfamin_extract(ctx, d);
+}
+
+func dfamin_setup(ctx: *dfamin, d: *dfa) {
+ var i: int;
+ var s: *dfaset;
+
+ if !d || d.mark {
+ return;
+ }
+ d.mark = d.tag + 1;
+
+ // Split states classes based on the accepting tag.
+ ensure_arr(ctx.a, (&ctx.sets) as **void, &ctx.capsets, d.tag + 1, sizeof(*ctx.sets));
+ s = ctx.sets[d.tag];
+ if !s {
+ s = alloc(ctx.a, sizeof(*s)) as *dfaset;
+ s.next = ctx.queue;
+ ctx.queue = s;
+ s.inqueue = 1;
+ ctx.sets[d.tag] = s;
+ if d.tag >= ctx.nsets {
+ ctx.nsets = d.tag + 1;
+ }
+ }
+
+ ensure_arr(ctx.a, (&s.ind) as **void, &s.ind_cap, s.ind_len + 1, sizeof(*s.ind));
+ s.ind[s.ind_len] = d;
+ s.ind_len = s.ind_len + 1;
+
+ i = 0;
+ loop {
+ if i == 256 {
+ break;
+ }
+ dfamin_setup(ctx, d.link[i]);
+ i = i + 1;
+ }
+}
+
+func dfamin_refine(ctx: *dfamin) {
+ var s: *dfaset;
+ var i: int;
+ var p: *dfaset;
+ var ch: int;
+ loop {
+ // Remove a set s from the queue
+ s = ctx.queue;
+ if !s {
+ break;
+ }
+ ctx.queue = s.next;
+ s.inqueue = 0;
+
+ i = 0;
+ loop {
+ // For each set p in the partition
+ if i == ctx.nsets {
+ break;
+ }
+ p = ctx.sets[i];
+
+ // For each letter of the alphabet
+ ch = 0;
+ loop {
+ if ch == 256 {
+ break;
+ }
+ dfamin_split(ctx, p, s, ch);
+ ch = ch + 1;
+ }
+
+ i = i + 1;
+ }
+ }
+}
+
+func dfamin_split(ctx: *dfamin, p: *dfaset, s: *dfaset, ch: int) {
+ var i: int;
+ var k: int;
+ var m: int;
+ var d: *dfa;
+ var e: *dfa;
+ var new_ind: **dfa;
+ var t: *dfaset;
+
+ // Count k the states in p that lead to s on ch
+ i = 0;
+ k = 0;
+ loop {
+ if i == p.ind_len {
+ break;
+ }
+ d = p.ind[i];
+
+ e = d.link[ch];
+ if e && ctx.sets[e.mark - 1] == s {
+ k = k + 1;
+ }
+
+ i = i + 1;
+ }
+
+ if k == 0 || k == p.ind_len {
+ return;
+ }
+
+ new_ind = alloc(ctx.a, sizeof(*new_ind) * (p.ind_len - k)) as **dfa;
+
+ // Split nodes into two sets
+ i = 0;
+ k = 0;
+ m = 0;
+ loop {
+ if i == p.ind_len {
+ break;
+ }
+ d = p.ind[i];
+
+ e = d.link[ch];
+ if e && ctx.sets[e.mark - 1] == s {
+ p.ind[k] = d;
+ k = k + 1;
+ } else {
+ new_ind[m] = d;
+ d.mark = ctx.nsets + 1;
+ m = m + 1;
+ }
+
+ i = i + 1;
+ }
+ p.ind_len = k;
+
+ // Create a new set from the nodes that don't lead to p
+ ensure_arr(ctx.a, (&ctx.sets) as **void, &ctx.capsets, ctx.nsets + 1, sizeof(*ctx.sets));
+ t = alloc(ctx.a, sizeof(*t)) as *dfaset;
+ t.next = ctx.queue;
+ t.ind = new_ind;
+ t.ind_len = m;
+ t.ind_cap = m;
+ ctx.queue = t;
+ t.inqueue = 1;
+ ctx.sets[ctx.nsets] = t;
+ ctx.nsets = ctx.nsets + 1;
+
+ // Queue up p
+ if !p.inqueue {
+ p.next = ctx.queue;
+ ctx.queue = p;
+ p.inqueue = 1;
+ }
+}
+
+func dfamin_extract(ctx: *dfamin, d: *dfa): *dfa {
+ var s: *dfaset;
+ var g: *dfa;
+ var ch: int;
+
+ if !d {
+ return nil;
+ }
+
+ s = ctx.sets[d.mark - 1];
+ if s.node {
+ return s.node;
+ }
+
+ g = alloc(ctx.a, sizeof(*g)) as *dfa;
+ g.id = d.mark - 1;
+ g.tag = d.tag;
+ g.link = alloc(ctx.a, sizeof(*g.link) * 256) as **dfa;
+
+ s.node = g;
+
+ ch = 0;
+ loop {
+ if ch == 256 {
+ break;
+ }
+ if d.link[ch] != nil {
+ g.link[ch] = dfamin_extract(ctx, d.link[ch]);
+ }
+ ch = ch + 1;
+ }
+
+ return g;
+}