os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit da2f6fae3ef648e090af395192959965d30d55db
parent 096147389d9be724b7aed58cc12e0331a6614f07
Author: erai <erai@omiltem.net>
Date:   Mon,  7 Apr 2025 21:24:26 +0000

hopcroft minimization

Diffstat:
Malloc.om | 29+++++++++++++++++++++++++++++
Mcc0.c | 418+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mcc1.om | 1+
Mlexer.om | 223++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 651 insertions(+), 20 deletions(-)

diff --git a/alloc.om b/alloc.om @@ -84,3 +84,32 @@ func alloc(c: *alloc, size: int): *byte { func free(a: *alloc, p: *byte): void { } + +func ensure_arr(a: *alloc, arr: **void, cap: *int, need: int, elen: int) { + var new_arr: *byte; + var new_cap: int; + + need = need * elen; + + new_cap = *cap; + if need < new_cap { + return; + } + + loop { + if need <= new_cap { + break; + } + new_cap = new_cap * 2 + 16; + } + + new_arr = alloc(a, new_cap); + + if *cap { + memcpy(new_arr, (*arr) as *byte, *cap); + free(a, (*arr) as *byte); + } + + *arr = new_arr as *void; + *cap = new_cap; +} diff --git a/cc0.c b/cc0.c @@ -49,9 +49,14 @@ u zdefstruct(); u zdefun(); u zdefunion(); u zdfa_cmp(); +u zdfa_minimize(); u zdfakey_add(); u zdfakey_clear(); u zdfakey_sort(); +u zdfamin_extract(); +u zdfamin_refine(); +u zdfamin_setup(); +u zdfamin_split(); u zdie(); u zemit(); u zemit_align(); @@ -63,6 +68,7 @@ u zemit_kstart(); u zemit_sections(); u zemit_ssr(); u zemit_strtab_str(); +u zensure_arr(); u zenter(); u zexit(); u zexpr_to_ir(); @@ -2839,7 +2845,7 @@ u zcomp_setup(u va, u verr) { u v13 = 0; v3 = (u)zalloc; v4 = va; - v5 = 136UL; + v5 = 144UL; v6 = ((u(*)())v3)(v4, v5); vc = v6; *(u*)(vc + 0UL) = va; @@ -4047,6 +4053,37 @@ b21: vi = vi + 1UL; b7: v3 = 0UL; goto b9; } +u zdfa_minimize(u vc, u vd) { + u v_ctx[5] = {0}; + u vctx = 0; + u v4 = 0; + u v5 = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + vctx = (u)v_ctx; + *(u*)(vctx + 0UL) = *(u*)(vc + 0UL); + v4 = (u)zdfamin_setup; + v5 = vctx; + v6 = vd; + v7 = ((u(*)())v4)(v5, v6); + v7; + v8 = (u)zdfamin_refine; + v9 = vctx; + v10 = ((u(*)())v8)(v9); + v10; + v11 = (u)zdfamin_extract; + v12 = vctx; + v13 = vd; + v14 = ((u(*)())v11)(v12, v13); + return v14; +} u zdfakey_add(u vk, u vn) { u vtmp = 0; u v3 = 0; @@ -4197,6 +4234,296 @@ b12: if ((s)*(u*)(*(u*)(*(u*)(vk + 8UL) + (vj - 1UL) * 8UL) + 0UL) > (s)*(u*)(vt b15: v4 = 0UL; goto b14; } +u zdfamin_extract(u vctx, u vd) { + u vs = 0; + u vg = 0; + u vch = 0; + u v5 = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + u v15 = 0; + u v16 = 0; + u v17 = 0; + if (!vd) goto b5; + v5 = 0UL; +b6: if (!v5) goto b3; + return 0UL; +b3: vs = *(u*)(*(u*)(vctx + 8UL) + (*(u*)(vd + 64UL) - 1UL) * 8UL); + if (!*(u*)(vs + 40UL)) goto b9; + return *(u*)(vs + 40UL); +b9: v6 = (u)zalloc; + v7 = *(u*)(vctx + 0UL); + v8 = 80UL; + v9 = ((u(*)())v6)(v7, v8); + vg = v9; + *(u*)(vg + 32UL) = *(u*)(vd + 64UL) - 1UL; + *(u*)(vg + 72UL) = *(u*)(vd + 72UL); + v10 = (u)zalloc; + v11 = *(u*)(vctx + 0UL); + v12 = 8UL * 256UL; + v13 = ((u(*)())v10)(v11, v12); + *(u*)(vg + 40UL) = v13; + *(u*)(vs + 40UL) = vg; + vch = 0UL; +b12: if (vch != 256UL) goto b16; + return vg; +b16: if (*(u*)(*(u*)(vd + 40UL) + vch * 8UL) == 0UL) goto b19; + v14 = (u)zdfamin_extract; + v15 = vctx; + v16 = *(u*)(*(u*)(vd + 40UL) + vch * 8UL); + v17 = ((u(*)())v14)(v15, v16); + *(u*)(*(u*)(vg + 40UL) + vch * 8UL) = v17; +b17: vch = vch + 1UL; + goto b12; +b19: goto b17; +b5: v5 = 1UL; + goto b6; +} +u zdfamin_refine(u vctx) { + u vs = 0; + u vi = 0; + u vp = 0; + u vch = 0; + u v5 = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; +b1: vs = *(u*)(vctx + 32UL); + if (!vs) goto b7; + v5 = 0UL; +b8: if (!v5) goto b5; + return 0UL; +b5: *(u*)(vctx + 32UL) = *(u*)(vs + 0UL); + *(u*)(vs + 8UL) = 0UL; + vi = 0UL; +b9: if (vi != *(u*)(vctx + 16UL)) goto b13; + goto b1; +b13: vp = *(u*)(*(u*)(vctx + 8UL) + vi * 8UL); + vch = 0UL; +b14: if (vch != 256UL) goto b18; + vi = vi + 1UL; + goto b9; +b18: v6 = (u)zdfamin_split; + v7 = vctx; + v8 = vp; + v9 = vs; + v10 = vch; + v11 = ((u(*)())v6)(v7, v8, v9, v10); + v11; + vch = vch + 1UL; + goto b14; +b7: v5 = 1UL; + goto b8; +} +u zdfamin_setup(u vctx, u vd) { + u vi = 0; + u vs = 0; + u v4 = 0; + u v5 = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + u v15 = 0; + u v16 = 0; + u v17 = 0; + u v18 = 0; + u v19 = 0; + u v20 = 0; + u v21 = 0; + u v22 = 0; + u v23 = 0; + u v24 = 0; + u v25 = 0; + u v26 = 0; + u v27 = 0; + u v28 = 0; + if (!vd) goto b9; + v5 = 0UL; +b10: if (!v5) goto b5; + v4 = 1UL; +b7: if (!v4) goto b3; + return 0UL; +b3: *(u*)(vd + 64UL) = *(u*)(vd + 72UL) + 1UL; + v6 = (u)zensure_arr; + v7 = *(u*)(vctx + 0UL); + v8 = vctx + 8UL; + v9 = vctx + 24UL; + v10 = *(u*)(vd + 72UL) + 1UL; + v11 = 8UL; + v12 = ((u(*)())v6)(v7, v8, v9, v10, v11); + v12; + vs = *(u*)(*(u*)(vctx + 8UL) + *(u*)(vd + 72UL) * 8UL); + if (!vs) goto b17; + v13 = 0UL; +b18: if (!v13) goto b15; + v14 = (u)zalloc; + v15 = *(u*)(vctx + 0UL); + v16 = 48UL; + v17 = ((u(*)())v14)(v15, v16); + vs = v17; + *(u*)(vs + 0UL) = *(u*)(vctx + 32UL); + *(u*)(vctx + 32UL) = vs; + *(u*)(vs + 8UL) = 1UL; + *(u*)(*(u*)(vctx + 8UL) + *(u*)(vd + 72UL) * 8UL) = vs; + if ((s)*(u*)(vd + 72UL) < (s)*(u*)(vctx + 16UL)) goto b22; + *(u*)(vctx + 16UL) = *(u*)(vd + 72UL) + 1UL; +b20:b13: v18 = (u)zensure_arr; + v19 = *(u*)(vctx + 0UL); + v20 = vs + 16UL; + v21 = vs + 32UL; + v22 = *(u*)(vs + 24UL) + 1UL; + v23 = 8UL; + v24 = ((u(*)())v18)(v19, v20, v21, v22, v23); + v24; + *(u*)(*(u*)(vs + 16UL) + *(u*)(vs + 24UL) * 8UL) = vd; + *(u*)(vs + 24UL) = *(u*)(vs + 24UL) + 1UL; + vi = 0UL; +b24: if (vi != 256UL) goto b28; + return 0UL; +b28: v25 = (u)zdfamin_setup; + v26 = vctx; + v27 = *(u*)(*(u*)(vd + 40UL) + vi * 8UL); + v28 = ((u(*)())v25)(v26, v27); + v28; + vi = vi + 1UL; + goto b24; +b22: goto b20; +b15: goto b13; +b17: v13 = 1UL; + goto b18; +b5: if (!*(u*)(vd + 64UL)) goto b11; + v4 = 1UL; + goto b7; +b11: v4 = 0UL; + goto b7; +b9: v5 = 1UL; + goto b10; +} +u zdfamin_split(u vctx, u vp, u vs, u vch) { + u vi = 0; + u vk = 0; + u vm = 0; + u vd = 0; + u ve = 0; + u vnew_ind = 0; + u vt = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + u v15 = 0; + u v16 = 0; + u v17 = 0; + u v18 = 0; + u v19 = 0; + u v20 = 0; + u v21 = 0; + u v22 = 0; + u v23 = 0; + u v24 = 0; + u v25 = 0; + u v26 = 0; + u v27 = 0; + u v28 = 0; + u v29 = 0; + vi = 0UL; + vk = 0UL; +b1: if (vi != *(u*)(vp + 24UL)) goto b5; + if (vk != 0UL) goto b17; + v12 = 1UL; +b19: if (!v12) goto b15; + return 0UL; +b15: v13 = (u)zalloc; + v14 = *(u*)(vctx + 0UL); + v15 = 8UL * (*(u*)(vp + 24UL) - vk); + v16 = ((u(*)())v13)(v14, v15); + vnew_ind = v16; + vi = 0UL; + vk = 0UL; + vm = 0UL; +b22: if (vi != *(u*)(vp + 24UL)) goto b26; + *(u*)(vp + 24UL) = vk; + v18 = (u)zensure_arr; + v19 = *(u*)(vctx + 0UL); + v20 = vctx + 8UL; + v21 = vctx + 24UL; + v22 = *(u*)(vctx + 16UL) + 1UL; + v23 = 8UL; + v24 = ((u(*)())v18)(v19, v20, v21, v22, v23); + v24; + v25 = (u)zalloc; + v26 = *(u*)(vctx + 0UL); + v27 = 48UL; + v28 = ((u(*)())v25)(v26, v27); + vt = v28; + *(u*)(vt + 0UL) = *(u*)(vctx + 32UL); + *(u*)(vt + 16UL) = vnew_ind; + *(u*)(vt + 24UL) = vm; + *(u*)(vt + 32UL) = vm; + *(u*)(vctx + 32UL) = vt; + *(u*)(vt + 8UL) = 1UL; + *(u*)(*(u*)(vctx + 8UL) + *(u*)(vctx + 16UL) * 8UL) = vt; + *(u*)(vctx + 16UL) = *(u*)(vctx + 16UL) + 1UL; + if (!*(u*)(vp + 8UL)) goto b40; + v29 = 0UL; +b41: if (!v29) goto b38; + *(u*)(vp + 0UL) = *(u*)(vctx + 32UL); + *(u*)(vctx + 32UL) = vp; + *(u*)(vp + 8UL) = 1UL; +b36: return 0UL; +b38: goto b36; +b40: v29 = 1UL; + goto b41; +b26: vd = *(u*)(*(u*)(vp + 16UL) + vi * 8UL); + ve = *(u*)(*(u*)(vd + 40UL) + vch * 8UL); + if (!ve) goto b31; + if (*(u*)(*(u*)(vctx + 8UL) + (*(u*)(ve + 64UL) - 1UL) * 8UL) != vs) goto b31; + v17 = 1UL; +b33: if (!v17) goto b29; + *(u*)(*(u*)(vp + 16UL) + vk * 8UL) = vd; + vk = vk + 1UL; +b27: vi = vi + 1UL; + goto b22; +b29: *(u*)(vnew_ind + vm * 8UL) = vd; + *(u*)(vd + 64UL) = *(u*)(vctx + 16UL) + 1UL; + vm = vm + 1UL; + goto b27; +b31: v17 = 0UL; + goto b33; +b17: if (vk != *(u*)(vp + 24UL)) goto b20; + v12 = 1UL; + goto b19; +b20: v12 = 0UL; + goto b19; +b5: vd = *(u*)(*(u*)(vp + 16UL) + vi * 8UL); + ve = *(u*)(*(u*)(vd + 40UL) + vch * 8UL); + if (!ve) goto b10; + if (*(u*)(*(u*)(vctx + 8UL) + (*(u*)(ve + 64UL) - 1UL) * 8UL) != vs) goto b10; + v11 = 1UL; +b12: if (!v11) goto b8; + vk = vk + 1UL; +b6: vi = vi + 1UL; + goto b1; +b8: goto b6; +b10: v11 = 0UL; + goto b12; +} u zdie(u vmsg) { u vlen = 0; u v2 = 0; @@ -14136,6 +14463,51 @@ b6: vi = vi + 1UL; b8: v7 = 1UL; goto b9; } +u zensure_arr(u va, u varr, u vcap, u vneed, u velen) { + u vnew_arr = 0; + u vnew_cap = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + u v15 = 0; + u v16 = 0; + u v17 = 0; + u v18 = 0; + u v19 = 0; + vneed = vneed * velen; + vnew_cap = *(u*)vcap; + if ((s)vneed >= (s)vnew_cap) goto b3; + return 0UL; +b3:b4: if ((s)vneed > (s)vnew_cap) goto b8; + v7 = (u)zalloc; + v8 = va; + v9 = vnew_cap; + v10 = ((u(*)())v7)(v8, v9); + vnew_arr = v10; + if (!*(u*)vcap) goto b12; + v11 = (u)zmemcpy; + v12 = vnew_arr; + v13 = *(u*)varr; + v14 = *(u*)vcap; + v15 = ((u(*)())v11)(v12, v13, v14); + v15; + v16 = (u)zfree; + v17 = va; + v18 = *(u*)varr; + v19 = ((u(*)())v16)(v17, v18); + v19; +b10: *(u*)varr = vnew_arr; + *(u*)vcap = vnew_cap; + return 0UL; +b12: goto b10; +b8: vnew_cap = vnew_cap * 2UL + 16UL; + goto b4; +} u zenter(u vc, u vtag) { u v2 = 0; u v3 = 0; @@ -23312,7 +23684,8 @@ u zlexer_compile_rule(u vc, u vpn) { v11 = vc; v12 = ((u(*)())v10)(v11); vb = v12; - *(u*)(vb + 8UL) = 1UL; + *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL; + *(u*)(vb + 8UL) = *(u*)(vc + 112UL); v13 = (u)znfa_concat; v14 = vc; v15 = va; @@ -23485,6 +23858,10 @@ u zlexer_explode(u vc, u vn) { u v17 = 0; u v18 = 0; u v19 = 0; + u v20 = 0; + u v21 = 0; + u v22 = 0; + u v23 = 0; vk = (u)v_k; *(u*)(vk + 0UL) = *(u*)(vc + 0UL); if (*(u*)(vn + 16UL) == -1UL) goto b3; @@ -23507,6 +23884,11 @@ b1: v8 = (u)zdfakey_add; v18 = *(u*)(vk + 8UL); v19 = ((u(*)())v16)(v17, v18); v19; + v20 = (u)zdfa_minimize; + v21 = vc; + v22 = vret; + v23 = ((u(*)())v20)(v21, v22); + vret = v23; return vret; b3: goto b1; } @@ -24816,7 +25198,7 @@ b3: v8 = (u)zdfakey_sort; v10 = ((u(*)())v8)(v9); v10; vp = 0UL; - vlink = vc + 128UL; + vlink = vc + 136UL; b5: vd = *(u*)vlink; if (!vd) goto b11; v11 = 0UL; @@ -24837,13 +25219,13 @@ b12: if (!v11) goto b9; v27 = ((u(*)())v24)(v25, v26); *(u*)(vd + 48UL) = v27; *(u*)(vd + 56UL) = *(u*)(vk + 16UL); - *(u*)(vd + 32UL) = *(u*)(vc + 120UL); - *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL; + *(u*)(vd + 32UL) = *(u*)(vc + 128UL); + *(u*)(vc + 128UL) = *(u*)(vc + 128UL) + 1UL; vi = 0UL; b22: if (vi != *(u*)(vk + 16UL)) goto b26; *(u*)(vd + 72UL) = *(u*)(vk + 32UL); v28 = (u)zrb_link; - v29 = vc + 128UL; + v29 = vc + 136UL; v30 = vlink; v31 = vp; v32 = vd + 0UL; @@ -24957,8 +25339,8 @@ b9: v9 = (u)zalloc; v11 = 72UL; v12 = ((u(*)())v9)(v10, v11); vn = v12; - *(u*)(vn + 0UL) = *(u*)(vc + 112UL); - *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL; + *(u*)(vn + 0UL) = *(u*)(vc + 120UL); + *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL; *(u*)(vn + 16UL) = -1UL; *(u*)(vn + 24UL) = -1UL; *(u*)(vn + 32UL) = va; @@ -25013,8 +25395,8 @@ u znfa_any(u vc) { v4 = 72UL; v5 = ((u(*)())v2)(v3, v4); vn = v5; - *(u*)(vn + 0UL) = *(u*)(vc + 112UL); - *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL; + *(u*)(vn + 0UL) = *(u*)(vc + 120UL); + *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL; *(u*)(vn + 32UL) = 0UL; *(u*)(vn + 16UL) = 0UL; *(u*)(vn + 24UL) = 255UL; @@ -25076,8 +25458,8 @@ u znfa_empty(u vc) { v4 = 72UL; v5 = ((u(*)())v2)(v3, v4); vn = v5; - *(u*)(vn + 0UL) = *(u*)(vc + 112UL); - *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL; + *(u*)(vn + 0UL) = *(u*)(vc + 120UL); + *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL; *(u*)(vn + 16UL) = -1UL; *(u*)(vn + 24UL) = -1UL; *(u*)(vn + 32UL) = 0UL; @@ -25106,8 +25488,8 @@ u znfa_lit(u vc, u vstart, u vend) { v6 = 72UL; v7 = ((u(*)())v4)(v5, v6); vn = v7; - *(u*)(vn + 0UL) = *(u*)(vc + 112UL); - *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL; + *(u*)(vn + 0UL) = *(u*)(vc + 120UL); + *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL; *(u*)(vn + 32UL) = 0UL; *(u*)(vn + 16UL) = vstart; *(u*)(vn + 24UL) = vend; @@ -25145,8 +25527,8 @@ b3: v4 = (u)zalloc; v6 = 72UL; v7 = ((u(*)())v4)(v5, v6); vb = v7; - *(u*)(vb + 0UL) = *(u*)(vc + 112UL); - *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL; + *(u*)(vb + 0UL) = *(u*)(vc + 120UL); + *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL; *(u*)(vb + 16UL) = -1UL; *(u*)(vb + 24UL) = -1UL; *(u*)(vb + 32UL) = 0UL; @@ -25198,8 +25580,8 @@ b3: v8 = (u)zalloc; v10 = 72UL; v11 = ((u(*)())v8)(v9, v10); vb = v11; - *(u*)(vb + 0UL) = *(u*)(vc + 112UL); - *(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL; + *(u*)(vb + 0UL) = *(u*)(vc + 120UL); + *(u*)(vc + 120UL) = *(u*)(vc + 120UL) + 1UL; *(u*)(vb + 16UL) = -1UL; *(u*)(vb + 24UL) = -1UL; *(u*)(vb + 32UL) = 0UL; diff --git a/cc1.om b/cc1.om @@ -32,6 +32,7 @@ struct compiler { // Usage stack used_top: *decl; + lex_id: int; nfa_id: int; dfa_id: int; dfa_cache: *rbnode; diff --git a/lexer.om b/lexer.om @@ -220,7 +220,8 @@ func lexer_compile_rule(c: *compiler, pn: *peg_node): *nfa { a = lexer_compile_pattern(c, pat); b = nfa_empty(c); - b.tag = 1; + c.lex_id = c.lex_id + 1; + b.tag = c.lex_id; return nfa_concat(c, a, b); } @@ -649,6 +650,8 @@ func lexer_explode(c: *compiler, n: *nfa): *dfa { free(k.a, k.live as *byte); + ret = dfa_minimize(c, ret); + return ret; } @@ -785,7 +788,11 @@ func dfa_show2(d: *dfa) { fputd(nil, d.id); if d.tag != 0 { - fputs(nil, "[shape=doublecircle]"); + fputs(nil, "[label=\""); + fputd(nil, d.tag); + fputs(nil, "\" shape=doublecircle]"); + } else { + fputs(nil, "[label=\"\"]"); } fputs(nil, ";\n"); @@ -838,3 +845,215 @@ func dfa_show2(d: *dfa) { dfa_show2(o); } } + +struct dfamin { + a: *alloc; + sets: **dfaset; + nsets: int; + capsets: int; + queue: *dfaset; +} + +struct dfaset { + next: *dfaset; + inqueue: int; + ind: **dfa; + ind_len: int; + ind_cap: int; + node: *dfa; +} + +func dfa_minimize(c: *compiler, d: *dfa): *dfa { + var _ctx: dfamin; + var ctx: *dfamin; + ctx = &_ctx; + ctx.a = c.a; + dfamin_setup(ctx, d); + dfamin_refine(ctx); + return dfamin_extract(ctx, d); +} + +func dfamin_setup(ctx: *dfamin, d: *dfa) { + var i: int; + var s: *dfaset; + + if !d || d.mark { + return; + } + d.mark = d.tag + 1; + + // Split states classes based on the accepting tag. + ensure_arr(ctx.a, (&ctx.sets) as **void, &ctx.capsets, d.tag + 1, sizeof(*ctx.sets)); + s = ctx.sets[d.tag]; + if !s { + s = alloc(ctx.a, sizeof(*s)) as *dfaset; + s.next = ctx.queue; + ctx.queue = s; + s.inqueue = 1; + ctx.sets[d.tag] = s; + if d.tag >= ctx.nsets { + ctx.nsets = d.tag + 1; + } + } + + ensure_arr(ctx.a, (&s.ind) as **void, &s.ind_cap, s.ind_len + 1, sizeof(*s.ind)); + s.ind[s.ind_len] = d; + s.ind_len = s.ind_len + 1; + + i = 0; + loop { + if i == 256 { + break; + } + dfamin_setup(ctx, d.link[i]); + i = i + 1; + } +} + +func dfamin_refine(ctx: *dfamin) { + var s: *dfaset; + var i: int; + var p: *dfaset; + var ch: int; + loop { + // Remove a set s from the queue + s = ctx.queue; + if !s { + break; + } + ctx.queue = s.next; + s.inqueue = 0; + + i = 0; + loop { + // For each set p in the partition + if i == ctx.nsets { + break; + } + p = ctx.sets[i]; + + // For each letter of the alphabet + ch = 0; + loop { + if ch == 256 { + break; + } + dfamin_split(ctx, p, s, ch); + ch = ch + 1; + } + + i = i + 1; + } + } +} + +func dfamin_split(ctx: *dfamin, p: *dfaset, s: *dfaset, ch: int) { + var i: int; + var k: int; + var m: int; + var d: *dfa; + var e: *dfa; + var new_ind: **dfa; + var t: *dfaset; + + // Count k the states in p that lead to s on ch + i = 0; + k = 0; + loop { + if i == p.ind_len { + break; + } + d = p.ind[i]; + + e = d.link[ch]; + if e && ctx.sets[e.mark - 1] == s { + k = k + 1; + } + + i = i + 1; + } + + if k == 0 || k == p.ind_len { + return; + } + + new_ind = alloc(ctx.a, sizeof(*new_ind) * (p.ind_len - k)) as **dfa; + + // Split nodes into two sets + i = 0; + k = 0; + m = 0; + loop { + if i == p.ind_len { + break; + } + d = p.ind[i]; + + e = d.link[ch]; + if e && ctx.sets[e.mark - 1] == s { + p.ind[k] = d; + k = k + 1; + } else { + new_ind[m] = d; + d.mark = ctx.nsets + 1; + m = m + 1; + } + + i = i + 1; + } + p.ind_len = k; + + // Create a new set from the nodes that don't lead to p + ensure_arr(ctx.a, (&ctx.sets) as **void, &ctx.capsets, ctx.nsets + 1, sizeof(*ctx.sets)); + t = alloc(ctx.a, sizeof(*t)) as *dfaset; + t.next = ctx.queue; + t.ind = new_ind; + t.ind_len = m; + t.ind_cap = m; + ctx.queue = t; + t.inqueue = 1; + ctx.sets[ctx.nsets] = t; + ctx.nsets = ctx.nsets + 1; + + // Queue up p + if !p.inqueue { + p.next = ctx.queue; + ctx.queue = p; + p.inqueue = 1; + } +} + +func dfamin_extract(ctx: *dfamin, d: *dfa): *dfa { + var s: *dfaset; + var g: *dfa; + var ch: int; + + if !d { + return nil; + } + + s = ctx.sets[d.mark - 1]; + if s.node { + return s.node; + } + + g = alloc(ctx.a, sizeof(*g)) as *dfa; + g.id = d.mark - 1; + g.tag = d.tag; + g.link = alloc(ctx.a, sizeof(*g.link) * 256) as **dfa; + + s.node = g; + + ch = 0; + loop { + if ch == 256 { + break; + } + if d.link[ch] != nil { + g.link[ch] = dfamin_extract(ctx, d.link[ch]); + } + ch = ch + 1; + } + + return g; +}