os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit f7e4ba08197ae732fe1e78fd7889e76e1eed60ac
parent d600475997f93d49b214fa142b9c66dc22d9bdb1
Author: erai <erai@omiltem.net>
Date:   Wed,  9 Apr 2025 18:24:01 +0000

Generate state tables

Diffstat:
Mbootstrap.sh | 2+-
Mcc0.c | 433++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mcc1.om | 10++++++++--
Mircout.om | 4++++
Mlexer.om | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alexlib.om | 2++
6 files changed, 484 insertions(+), 83 deletions(-)

diff --git a/bootstrap.sh b/bootstrap.sh @@ -2,7 +2,7 @@ BOOTSTRAP="cc0.c" LIBS="bufio.om lib.om alloc.om syscall.om" -SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om cc4.om" +SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om cc4.om lexlib.om" # Build the bootstrap compiler from c [ cc0 -nt cc0.c ] || ${CC:-gcc} -O1 -g ${BOOTSTRAP} -o cc0 diff --git a/cc0.c b/cc0.c @@ -144,6 +144,8 @@ u zlalr_compiler(); u zlayout_struct(); u zlayout_union(); u zleave(); +u zlexer_codegen(); +u zlexer_codegen_setup(); u zlexer_compile(); u zlexer_compile_alternative(); u zlexer_compile_any(); @@ -154,6 +156,7 @@ u zlexer_compile_rule(); u zlexer_compile_str(); u zlexer_compile_suffix(); u zlexer_explode(); +u zlexer_output(); u zliteral(); u zlocals_to_ir(); u zmain(); @@ -3645,6 +3648,7 @@ u zdefextern(u vc, u vn) { u v16 = 0; u v17 = 0; u v18 = 0; + u v19 = 0; *(u*)(vc + 24UL) = *(u*)(vn + 24UL); *(u*)(vc + 32UL) = *(u*)(vn + 32UL); *(u*)(vc + 40UL) = *(u*)(vn + 40UL); @@ -3662,15 +3666,16 @@ u zdefextern(u vc, u vn) { v14 = ((u(*)())v9)(v10, v11, v12, v13); vd = v14; if (!*(u*)(vd + 72UL)) goto b5; - v15 = (u)zcdie; + v15 = (u)zunify; v16 = vc; - v17 = (u)"duplicate function"; - v18 = ((u(*)())v15)(v16, v17); - v18; -b3: *(u*)(vd + 72UL) = 1UL; + v17 = *(u*)(vd + 80UL); + v18 = vt; + v19 = ((u(*)())v15)(v16, v17, v18); + v19; + return vd; +b5: *(u*)(vd + 72UL) = 1UL; *(u*)(vd + 80UL) = vt; return vd; -b5: goto b3; } u zdefine_enum_tag(u vc, u vname, u vvalue) { u vd = 0; @@ -3873,53 +3878,63 @@ u zdefun(u vc, u vfuncdef) { u v28 = 0; u v29 = 0; u v30 = 0; + u v31 = 0; + u v32 = 0; + u v33 = 0; v7 = (u)zdefextern; v8 = vc; v9 = *(u*)(vfuncdef + 8UL); v10 = ((u(*)())v7)(v8, v9); vd = v10; + if (*(u*)(vd + 72UL) != 2UL) goto b4; + v11 = (u)zdie; + v12 = (u)"duplicate function"; + v13 = ((u(*)())v11)(v12); + v13; +b2: *(u*)(vd + 72UL) = 2UL; vn = *(u*)(*(u*)(*(u*)(vfuncdef + 8UL) + 16UL) + 8UL); -b2: if (!vn) goto b8; - v11 = 0UL; -b9: if (!v11) goto b6; - v26 = (u)zhoist_locals; - v27 = vc; - v28 = vd; - v29 = *(u*)(vfuncdef + 16UL); - v30 = ((u(*)())v26)(v27, v28, v29); - v30; +b6: if (!vn) goto b12; + v14 = 0UL; +b13: if (!v14) goto b10; + v29 = (u)zhoist_locals; + v30 = vc; + v31 = vd; + v32 = *(u*)(vfuncdef + 16UL); + v33 = ((u(*)())v29)(v30, v31, v32); + v33; return 0UL; -b6: *(u*)(vc + 24UL) = *(u*)(vn + 24UL); +b10: *(u*)(vc + 24UL) = *(u*)(vn + 24UL); *(u*)(vc + 32UL) = *(u*)(vn + 32UL); *(u*)(vc + 40UL) = *(u*)(vn + 40UL); *(u*)(*(u*)(vc + 48UL) + 88UL) = *(u*)(vn + 24UL); *(u*)(*(u*)(vc + 48UL) + 96UL) = *(u*)(vn + 32UL); vname = *(u*)(*(u*)(*(u*)(vn + 8UL) + 8UL) + 56UL); - v12 = (u)zprototype; - v13 = vc; - v14 = *(u*)(*(u*)(vn + 8UL) + 16UL); - v15 = ((u(*)())v12)(v13, v14); - vt = v15; - v16 = (u)zfind; - v17 = vc; - v18 = *(u*)(vd + 32UL); - v19 = vname; - v20 = 1UL; - v21 = ((u(*)())v16)(v17, v18, v19, v20); - vv = v21; - if (!*(u*)(vv + 192UL)) goto b14; - v22 = (u)zcdie; - v23 = vc; - v24 = (u)"duplicate argument"; - v25 = ((u(*)())v22)(v23, v24); - v25; -b12: *(u*)(vv + 192UL) = 1UL; + v15 = (u)zprototype; + v16 = vc; + v17 = *(u*)(*(u*)(vn + 8UL) + 16UL); + v18 = ((u(*)())v15)(v16, v17); + vt = v18; + v19 = (u)zfind; + v20 = vc; + v21 = *(u*)(vd + 32UL); + v22 = vname; + v23 = 1UL; + v24 = ((u(*)())v19)(v20, v21, v22, v23); + vv = v24; + if (!*(u*)(vv + 192UL)) goto b18; + v25 = (u)zcdie; + v26 = vc; + v27 = (u)"duplicate argument"; + v28 = ((u(*)())v25)(v26, v27); + v28; +b16: *(u*)(vv + 192UL) = 1UL; *(u*)(vv + 200UL) = vt; vn = *(u*)(vn + 16UL); - goto b2; -b14: goto b12; -b8: v11 = 1UL; - goto b9; + goto b6; +b18: goto b16; +b12: v14 = 1UL; + goto b13; +b4: goto b2; } u zdefunion(u vc, u vn) { u vname = 0; @@ -21073,6 +21088,10 @@ u zircstr(u vc, u vs, u vn) { u v33 = 0; u v34 = 0; u v35 = 0; + u v36 = 0; + u v37 = 0; + u v38 = 0; + u v39 = 0; vi = 0UL; v5 = (u)zfputs; v6 = *(u*)(vc + 72UL); @@ -21080,63 +21099,70 @@ u zircstr(u vc, u vs, u vn) { v8 = ((u(*)())v5)(v6, v7); v8; b2: if (vi != vn) goto b6; - v32 = (u)zfputs; - v33 = *(u*)(vc + 72UL); - v34 = (u)"\042"; - v35 = ((u(*)())v32)(v33, v34); - v35; + v36 = (u)zfputs; + v37 = *(u*)(vc + 72UL); + v38 = (u)"\042"; + v39 = ((u(*)())v36)(v37, v38); + v39; return 0UL; -b6: vch = (u)*(b*)(vs + vi * 1UL); - if ((s)vch >= (s)32UL) goto b11; - v9 = 1UL; -b13: if (!v9) goto b9; - v12 = (u)zfputc; - v13 = *(u*)(vc + 72UL); - v14 = 92UL; - v15 = ((u(*)())v12)(v13, v14); - v15; +b6: if ((u)((s)vi % (s)100UL) != 99UL) goto b9; + v9 = (u)zfputs; + v10 = *(u*)(vc + 72UL); + v11 = (u)"\042\012\011\011\042"; + v12 = ((u(*)())v9)(v10, v11); + v12; +b7: vch = (u)*(b*)(vs + vi * 1UL); + if ((s)vch >= (s)32UL) goto b15; + v13 = 1UL; +b17: if (!v13) goto b13; v16 = (u)zfputc; v17 = *(u*)(vc + 72UL); - v18 = 48UL + (vch >> 6UL & 7UL); + v18 = 92UL; v19 = ((u(*)())v16)(v17, v18); v19; v20 = (u)zfputc; v21 = *(u*)(vc + 72UL); - v22 = 48UL + (vch >> 3UL & 7UL); + v22 = 48UL + (vch >> 6UL & 7UL); v23 = ((u(*)())v20)(v21, v22); v23; v24 = (u)zfputc; v25 = *(u*)(vc + 72UL); - v26 = 48UL + (vch & 7UL); + v26 = 48UL + (vch >> 3UL & 7UL); v27 = ((u(*)())v24)(v25, v26); v27; -b7: vi = vi + 1UL; - goto b2; -b9: v28 = (u)zfputc; + v28 = (u)zfputc; v29 = *(u*)(vc + 72UL); - v30 = vch; + v30 = 48UL + (vch & 7UL); v31 = ((u(*)())v28)(v29, v30); v31; - goto b7; -b11: if ((s)vch <= (s)127UL) goto b16; - v10 = 1UL; -b18: if (!v10) goto b14; - v9 = 1UL; - goto b13; -b14: v9 = 0UL; - goto b13; -b16: if (vch != 92UL) goto b21; - v11 = 1UL; -b23: if (!v11) goto b19; - v10 = 1UL; - goto b18; -b19: v10 = 0UL; - goto b18; -b21: if (vch != 34UL) goto b24; - v11 = 1UL; - goto b23; -b24: v11 = 0UL; - goto b23; +b11: vi = vi + 1UL; + goto b2; +b13: v32 = (u)zfputc; + v33 = *(u*)(vc + 72UL); + v34 = vch; + v35 = ((u(*)())v32)(v33, v34); + v35; + goto b11; +b15: if ((s)vch <= (s)127UL) goto b20; + v14 = 1UL; +b22: if (!v14) goto b18; + v13 = 1UL; + goto b17; +b18: v13 = 0UL; + goto b17; +b20: if (vch != 92UL) goto b25; + v15 = 1UL; +b27: if (!v15) goto b23; + v14 = 1UL; + goto b22; +b23: v14 = 0UL; + goto b22; +b25: if (vch != 34UL) goto b28; + v15 = 1UL; + goto b27; +b28: v15 = 0UL; + goto b27; +b9: goto b7; } u zircuse(u vc, u vic, u vib) { u vi = 0; @@ -23295,6 +23321,124 @@ b7: *(u*)(vc + 168UL) = *(u*)(vc + 168UL) * 2UL; goto b5; b4: goto b2; } +u zlexer_codegen(u vc, u vd) { + u v_cg[8] = {0}; + u vcg = 0; + u vi = 0; + u vch = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + u v15 = 0; + u v16 = 0; + u v17 = 0; + u v18 = 0; + u v19 = 0; + u v20 = 0; + u v21 = 0; + vcg = (u)v_cg; + *(u*)(vcg + 0UL) = *(u*)(vc + 0UL); + v6 = (u)zlexer_codegen_setup; + v7 = vcg; + v8 = vd; + v9 = ((u(*)())v6)(v7, v8); + v9; + *(u*)(vcg + 40UL) = (*(u*)(vcg + 16UL) + 1UL) * 8UL; + v10 = (u)zalloc; + v11 = *(u*)(vcg + 0UL); + v12 = *(u*)(vcg + 40UL) + 1UL; + v13 = ((u(*)())v10)(v11, v12); + *(u*)(vcg + 32UL) = v13; + *(u*)(*(u*)(vcg + 32UL) + *(u*)(vcg + 16UL) * 8UL) = -1UL; + *(u*)(vcg + 56UL) = *(u*)(vcg + 16UL) * 256UL * 8UL; + v14 = (u)zalloc; + v15 = *(u*)(vcg + 0UL); + v16 = *(u*)(vcg + 56UL) + 1UL; + v17 = ((u(*)())v14)(v15, v16); + *(u*)(vcg + 48UL) = v17; + vi = 0UL; +b4: if (vi != *(u*)(vcg + 16UL)) goto b8; + vi = 0UL; +b9: if (vi != *(u*)(vcg + 16UL)) goto b13; + v18 = (u)zlexer_output; + v19 = vc; + v20 = vcg; + v21 = ((u(*)())v18)(v19, v20); + v21; + return 0UL; +b13: vd = *(u*)(*(u*)(vcg + 8UL) + vi * 8UL); + *(u*)(*(u*)(vcg + 32UL) + vi * 8UL) = *(u*)(vd + 72UL); + vch = 0UL; +b14: if (vch != 256UL) goto b18; + vi = vi + 1UL; + goto b9; +b18: if (!*(u*)(*(u*)(vd + 40UL) + vch * 8UL)) goto b21; + *(u*)(*(u*)(vcg + 48UL) + (vi * 256UL + vch) * 8UL) = *(u*)(*(u*)(*(u*)(vd + 40UL) + vch * 8UL) + 64UL); +b19: vch = vch + 1UL; + goto b14; +b21: *(u*)(*(u*)(vcg + 48UL) + (vi * 256UL + vch) * 8UL) = -1UL; + goto b19; +b8: vd = *(u*)(*(u*)(vcg + 8UL) + vi * 8UL); + *(u*)(vd + 64UL) = vi; + vi = vi + 1UL; + goto b4; +} +u zlexer_codegen_setup(u vcg, u vd) { + u vi = 0; + u v3 = 0; + u v4 = 0; + u v5 = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + u v15 = 0; + if (!vd) goto b9; + v4 = 0UL; +b10: if (!v4) goto b5; + v3 = 1UL; +b7: if (!v3) goto b3; + return 0UL; +b3: *(u*)(vd + 64UL) = 1UL; + v5 = (u)zensure_arr; + v6 = *(u*)(vcg + 0UL); + v7 = vcg + 8UL; + v8 = vcg + 24UL; + v9 = *(u*)(vcg + 16UL) + 1UL; + v10 = 8UL; + v11 = ((u(*)())v5)(v6, v7, v8, v9, v10); + v11; + *(u*)(*(u*)(vcg + 8UL) + *(u*)(vcg + 16UL) * 8UL) = vd; + *(u*)(vcg + 16UL) = *(u*)(vcg + 16UL) + 1UL; + vi = 0UL; +b13: if (vi != 256UL) goto b17; + return 0UL; +b17: v12 = (u)zlexer_codegen_setup; + v13 = vcg; + v14 = *(u*)(*(u*)(vd + 40UL) + vi * 8UL); + v15 = ((u(*)())v12)(v13, v14); + v15; + vi = vi + 1UL; + goto b13; +b5: if (!*(u*)(vd + 64UL)) goto b11; + v3 = 1UL; + goto b7; +b11: v3 = 0UL; + goto b7; +b9: v4 = 1UL; + goto b10; +} u zlexer_compile(u vc, u vpn, u verr) { u va = 0; u vb = 0; @@ -23313,6 +23457,10 @@ u zlexer_compile(u vc, u vpn, u verr) { u v17 = 0; u v18 = 0; u v19 = 0; + u v20 = 0; + u v21 = 0; + u v22 = 0; + u v23 = 0; vpn = *(u*)(vpn + 16UL); b1: if (!vpn) goto b7; v6 = 0UL; @@ -23322,6 +23470,11 @@ b8: if (!v6) goto b5; v18 = va; v19 = ((u(*)())v16)(v17, v18); vd = v19; + v20 = (u)zlexer_codegen; + v21 = vc; + v22 = vd; + v23 = ((u(*)())v20)(v21, v22); + v23; return 0UL; b5: v7 = (u)zlexer_compile_rule; v8 = vc; @@ -23926,6 +24079,126 @@ b1: v8 = (u)zdfakey_add; return vret; b3: goto b1; } +u zlexer_output(u vc, u vcg) { + u vic = 0; + u vo = 0; + u vret_type = 0; + u vfunc_type = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + u v9 = 0; + u v10 = 0; + u v11 = 0; + u v12 = 0; + u v13 = 0; + u v14 = 0; + u v15 = 0; + u v16 = 0; + u v17 = 0; + u v18 = 0; + u v19 = 0; + u v20 = 0; + u v21 = 0; + u v22 = 0; + u v23 = 0; + u v24 = 0; + u v25 = 0; + u v26 = 0; + u v27 = 0; + u v28 = 0; + u v29 = 0; + u v30 = 0; + u v31 = 0; + u v32 = 0; + u v33 = 0; + u v34 = 0; + u v35 = 0; + u v36 = 0; + u v37 = 0; + u v38 = 0; + u v39 = 0; + u v40 = 0; + u v41 = 0; + u v42 = 0; + u v43 = 0; + u v44 = 0; + u v45 = 0; + u v46 = 0; + u v47 = 0; + u v48 = 0; + u v49 = 0; + u v50 = 0; + u v51 = 0; + u v52 = 0; + u v53 = 0; + u v54 = 0; + u v55 = 0; + u v56 = 0; + v6 = (u)zmktype0; + v7 = vc; + v8 = 2UL; + v9 = ((u(*)())v6)(v7, v8); + vret_type = v9; + v10 = (u)zmktype1; + v11 = vc; + v12 = 4UL; + v13 = vret_type; + v14 = ((u(*)())v10)(v11, v12, v13); + vret_type = v14; + v15 = (u)zmktype2; + v16 = vc; + v17 = 6UL; + v18 = vret_type; + v19 = 0UL; + v20 = ((u(*)())v15)(v16, v17, v18, v19); + vfunc_type = v20; + v21 = (u)zmkirfunc; + v22 = vc; + v23 = (u)"get_tag_table"; + v24 = ((u(*)())v21)(v22, v23); + vic = v24; + v25 = (u)zmkirstr; + v26 = vic; + v27 = *(u*)(vcg + 32UL); + v28 = *(u*)(vcg + 40UL); + v29 = ((u(*)())v25)(v26, v27, v28); + vo = v29; + v30 = (u)zirreturn; + v31 = vic; + v32 = vo; + v33 = ((u(*)())v30)(v31, v32); + v33; + v34 = (u)zdefine_ir_func; + v35 = vc; + v36 = vic; + v37 = vfunc_type; + v38 = ((u(*)())v34)(v35, v36, v37); + v38; + v39 = (u)zmkirfunc; + v40 = vc; + v41 = (u)"get_link_table"; + v42 = ((u(*)())v39)(v40, v41); + vic = v42; + v43 = (u)zmkirstr; + v44 = vic; + v45 = *(u*)(vcg + 48UL); + v46 = *(u*)(vcg + 56UL); + v47 = ((u(*)())v43)(v44, v45, v46); + vo = v47; + v48 = (u)zirreturn; + v49 = vic; + v50 = vo; + v51 = ((u(*)())v48)(v49, v50); + v51; + v52 = (u)zdefine_ir_func; + v53 = vc; + v54 = vic; + v55 = vfunc_type; + v56 = ((u(*)())v52)(v53, v54, v55); + v56; + return 0UL; +} u zliteral(u vc, u vs) { u vi = 0; u vch = 0; diff --git a/cc1.om b/cc1.om @@ -358,8 +358,9 @@ func defextern(c: *compiler, n: *node): *decl { d = find(c, name, nil, 1); - if (d.func_defined) { - cdie(c, "duplicate function"); + if d.func_defined { + unify(c, d.func_type, t); + return d; } d.func_defined = 1; @@ -377,6 +378,11 @@ func defun(c: *compiler, funcdef: *node) { d = defextern(c, funcdef.a); + if d.func_defined == 2 { + die("duplicate function"); + } + d.func_defined = 2; + n = funcdef.a.b.a; loop { if (!n) { diff --git a/ircout.om b/ircout.om @@ -339,6 +339,10 @@ func ircstr(c: *compiler, s: *byte, n: int) { break; } + if i % 100 == 99 { + fputs(c.cout, "\"\n\t\t\""); + } + ch = s[i] as int; if ch < 32 || ch > 127 || ch == '\\' || ch == '"' { diff --git a/lexer.om b/lexer.om @@ -208,6 +208,8 @@ func lexer_compile(c: *compiler, pn: *peg_node, err: *file) { } d = lexer_explode(c, a); + + lexer_codegen(c, d); } func lexer_compile_rule(c: *compiler, pn: *peg_node): *nfa { @@ -1065,3 +1067,117 @@ func dfamin_extract(ctx: *dfamin, d: *dfa): *dfa { return g; } + +struct dfa_codegen { + a: *alloc; + states: **dfa; + states_len: int; + states_cap: int; + tag: *int; + tag_len: int; + link: *int; + link_len: int; +} + +func lexer_codegen(c: *compiler, d: *dfa) { + var _cg: dfa_codegen; + var cg: *dfa_codegen; + var i: int; + var ch: int; + cg = &_cg; + cg.a = c.a; + + lexer_codegen_setup(cg, d); + + cg.tag_len = (cg.states_len + 1) * sizeof(*cg.tag); + cg.tag = alloc(cg.a, cg.tag_len + 1) as *int; + cg.tag[cg.states_len] = -1; + + cg.link_len = cg.states_len * 256 * sizeof(*cg.link); + cg.link = alloc(cg.a, cg.link_len + 1) as *int; + + i = 0; + loop { + if i == cg.states_len { + break; + } + + d = cg.states[i]; + d.mark = i; + + i = i + 1; + } + + i = 0; + loop { + if i == cg.states_len { + break; + } + + d = cg.states[i]; + cg.tag[i] = d.tag; + + ch = 0; + loop { + if ch == 256 { + break; + } + + if d.link[ch] { + cg.link[i * 256 + ch] = d.link[ch].mark; + } else { + cg.link[i * 256 + ch] = -1; + } + + ch = ch + 1; + } + + i = i + 1; + } + + lexer_output(c, cg); +} + +func lexer_codegen_setup(cg: *dfa_codegen, d: *dfa) { + var i: int; + + if !d || d.mark { + return; + } + d.mark = 1; + + ensure_arr(cg.a, (&cg.states) as **void, &cg.states_cap, cg.states_len + 1, sizeof(*cg.states)); + cg.states[cg.states_len] = d; + cg.states_len = cg.states_len + 1; + + i = 0; + loop { + if i == 256 { + break; + } + lexer_codegen_setup(cg, d.link[i]); + i = i + 1; + } +} + +func lexer_output(c: *compiler, cg: *dfa_codegen) { + var ic: *irfunc; + var o: *irop; + var ret_type: *type; + var func_type: *type; + + ret_type = mktype0(c, TY_INT); + ret_type = mktype1(c, TY_PTR, ret_type); + + func_type = mktype2(c, TY_FUNC, ret_type, nil); + + ic = mkirfunc(c, "get_tag_table"); + o = mkirstr(ic, cg.tag as *byte, cg.tag_len); + irreturn(ic, o); + define_ir_func(c, ic, func_type); + + ic = mkirfunc(c, "get_link_table"); + o = mkirstr(ic, cg.link as *byte, cg.link_len); + irreturn(ic, o); + define_ir_func(c, ic, func_type); +} diff --git a/lexlib.om b/lexlib.om @@ -0,0 +1,2 @@ +func get_tag_table(): *int; +func get_link_table(): *int;