os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit deeffcab331bb7e1e2fd0d18c92fe3662fc39e69
parent 7dbbe8ac0fb1c48750e5766fe27de673613347c8
Author: erai <erai@omiltem.net>
Date:   Wed,  9 Apr 2025 20:49:39 -0400

move lexlib into lexer

Diffstat:
Mbootstrap.sh | 2+-
Mcc0.c | 181+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mcc4.om | 180++++++++++++++++++++++++++++++++++++++++----------------------------------------
Ddump.om | 31-------------------------------
Mlexer.om | 200+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dlexlib.om | 197-------------------------------------------------------------------------------
6 files changed, 385 insertions(+), 406 deletions(-)

diff --git a/bootstrap.sh b/bootstrap.sh @@ -2,7 +2,7 @@ BOOTSTRAP="cc0.c" LIBS="bufio.om lib.om alloc.om syscall.om" -SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om cc4.om lexlib.om" +SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om cc4.om" # Build the bootstrap compiler from c [ cc0 -nt cc0.c ] || ${CC:-gcc} -O1 -g ${BOOTSTRAP} -o cc0 diff --git a/cc0.c b/cc0.c @@ -4659,24 +4659,20 @@ b8: v20 = (u)zgettok; v22 = ((u(*)())v20)(v21); vtok = v22; if (vtok != -1UL) goto b13; - if (!*(u*)(vl + 40UL)) goto b29; + if (!*(u*)(vl + 48UL)) goto b23; v43 = 0UL; -b30: if (!v43) goto b27; +b24: if (!v43) goto b21; v44 = (u)zdie; v45 = (u)"problem"; v46 = ((u(*)())v44)(v45); v46; -b25: return 0UL; -b27: goto b25; -b29: v43 = 1UL; - goto b30; -b13: if (vtok != 59UL) goto b16; - goto b8; -b16: if (vtok != 60UL) goto b19; - goto b8; -b19: v23 = (u)zfputs; +b19: return 0UL; +b21: goto b19; +b23: v43 = 1UL; + goto b24; +b13: v23 = (u)zfputs; v24 = 0UL; - v26 = (u)zget_tag_name; + v26 = *(u*)(vl + 32UL); v27 = vtok; v28 = ((u(*)())v26)(v27); v25 = v28; @@ -4689,8 +4685,8 @@ b19: v23 = (u)zfputs; v33; v34 = (u)zfputb; v35 = 0UL; - v36 = *(u*)(vl + 56UL) + *(u*)(vl + 64UL) * 1UL; - v37 = *(u*)(vl + 72UL) - *(u*)(vl + 64UL); + v36 = *(u*)(vl + 64UL) + *(u*)(vl + 72UL) * 1UL; + v37 = *(u*)(vl + 80UL) - *(u*)(vl + 72UL); v38 = ((u(*)())v34)(v35, v36, v37); v38; v39 = (u)zfputs; @@ -19990,93 +19986,103 @@ u zgettok(u vl) { u v32 = 0; u v33 = 0; u v34 = 0; - vptr = *(u*)(vl + 72UL); - vlineno = *(u*)(vl + 120UL); - vcolno = *(u*)(vl + 128UL); - *(u*)(vl + 64UL) = vptr; - *(u*)(vl + 104UL) = vlineno; - *(u*)(vl + 112UL) = vcolno; - *(u*)(vl + 96UL) = -1UL; - *(u*)(vl + 120UL) = vlineno; - *(u*)(vl + 128UL) = vcolno; + u v35 = 0; +b1: vptr = *(u*)(vl + 80UL); + vlineno = *(u*)(vl + 128UL); + vcolno = *(u*)(vl + 136UL); + *(u*)(vl + 72UL) = vptr; + *(u*)(vl + 112UL) = vlineno; + *(u*)(vl + 120UL) = vcolno; + *(u*)(vl + 104UL) = -1UL; + *(u*)(vl + 128UL) = vlineno; + *(u*)(vl + 136UL) = vcolno; vstate = 0UL; -b1: if (vptr != *(u*)(vl + 80UL)) goto b5; - if (!*(u*)(vl + 48UL)) goto b8; - if (*(u*)(vl + 64UL) != *(u*)(vl + 80UL)) goto b11; - *(u*)(vl + 40UL) = 1UL; -b9:b2: return *(u*)(vl + 96UL); -b11: goto b9; -b8: if ((s)(*(u*)(vl + 80UL) - *(u*)(vl + 64UL)) < (s)(*(u*)(vl + 88UL) >> 1UL)) goto b14; - *(u*)(vl + 88UL) = *(u*)(vl + 88UL) * 2UL + 256UL; +b3: if (vptr != *(u*)(vl + 88UL)) goto b7; + if (!*(u*)(vl + 56UL)) goto b10; + if (*(u*)(vl + 72UL) != *(u*)(vl + 88UL)) goto b13; + *(u*)(vl + 48UL) = 1UL; +b11:b4: if (*(u*)(vl + 104UL) != 59UL) goto b44; + v35 = 1UL; +b46: if (!v35) goto b42; + goto b1; +b42: return *(u*)(vl + 104UL); +b44: if (*(u*)(vl + 104UL) != 60UL) goto b47; + v35 = 1UL; + goto b46; +b47: v35 = 0UL; + goto b46; +b13: goto b11; +b10: if ((s)(*(u*)(vl + 88UL) - *(u*)(vl + 72UL)) < (s)(*(u*)(vl + 96UL) >> 1UL)) goto b16; + *(u*)(vl + 96UL) = *(u*)(vl + 96UL) * 2UL + 256UL; v9 = (u)zalloc; v10 = *(u*)(vl + 0UL); - v11 = *(u*)(vl + 88UL); + v11 = *(u*)(vl + 96UL); v12 = ((u(*)())v9)(v10, v11); vtmp = v12; v13 = (u)zmemcpy; v14 = vtmp; - v15 = *(u*)(vl + 56UL) + *(u*)(vl + 64UL) * 1UL; - v16 = *(u*)(vl + 80UL) - *(u*)(vl + 64UL); + v15 = *(u*)(vl + 64UL) + *(u*)(vl + 72UL) * 1UL; + v16 = *(u*)(vl + 88UL) - *(u*)(vl + 72UL); v17 = ((u(*)())v13)(v14, v15, v16); v17; v18 = (u)zfree; v19 = *(u*)(vl + 0UL); - v20 = *(u*)(vl + 56UL); + v20 = *(u*)(vl + 64UL); v21 = ((u(*)())v18)(v19, v20); v21; - *(u*)(vl + 56UL) = vtmp; - vptr = vptr - *(u*)(vl + 64UL); - *(u*)(vl + 72UL) = *(u*)(vl + 72UL) - *(u*)(vl + 64UL); - *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 64UL); - *(u*)(vl + 64UL) = 0UL; -b12: v27 = (u)zread; - v28 = *(u*)(vl + 32UL); - v29 = *(u*)(vl + 56UL) + *(u*)(vl + 80UL) * 1UL; - v30 = *(u*)(vl + 88UL) - *(u*)(vl + 80UL); + *(u*)(vl + 64UL) = vtmp; + vptr = vptr - *(u*)(vl + 72UL); + *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 72UL); + *(u*)(vl + 88UL) = *(u*)(vl + 88UL) - *(u*)(vl + 72UL); + *(u*)(vl + 72UL) = 0UL; +b14: v27 = (u)zread; + v28 = *(u*)(vl + 40UL); + v29 = *(u*)(vl + 64UL) + *(u*)(vl + 88UL) * 1UL; + v30 = *(u*)(vl + 96UL) - *(u*)(vl + 88UL); v31 = ((u(*)())v27)(v28, v29, v30); vret = v31; - if ((s)vret >= (s)0UL) goto b24; + if ((s)vret >= (s)0UL) goto b26; v32 = (u)zdie; v33 = (u)"read failed"; v34 = ((u(*)())v32)(v33); v34; -b22: if (vret != 0UL) goto b28; - *(u*)(vl + 48UL) = 1UL; - goto b2; -b28: *(u*)(vl + 80UL) = *(u*)(vl + 80UL) + vret; -b3: vch = (u)*(b*)(*(u*)(vl + 56UL) + vptr * 1UL); +b24: if (vret != 0UL) goto b30; + *(u*)(vl + 56UL) = 1UL; + goto b4; +b30: *(u*)(vl + 88UL) = *(u*)(vl + 88UL) + vret; +b5: vch = (u)*(b*)(*(u*)(vl + 64UL) + vptr * 1UL); vptr = vptr + 1UL; - if (vch != 10UL) goto b31; + if (vch != 10UL) goto b33; vlineno = vlineno + 1UL; vcolno = 1UL; -b29: vstate = *(u*)(*(u*)(vl + 24UL) + (vstate * 256UL + vch) * 8UL); - if (vstate != -1UL) goto b34; - goto b2; -b34: vtag = *(u*)(*(u*)(vl + 16UL) + vstate * 8UL); - if (!vtag) goto b37; - *(u*)(vl + 96UL) = vtag; - *(u*)(vl + 72UL) = vptr; - *(u*)(vl + 120UL) = vlineno; - *(u*)(vl + 128UL) = vcolno; -b35: goto b1; -b37: goto b35; -b31: vcolno = vcolno + 1UL; - goto b29; -b24: goto b22; -b14: if ((s)*(u*)(vl + 80UL) < (s)(*(u*)(vl + 88UL) >> 1UL)) goto b19; +b31: vstate = *(u*)(*(u*)(vl + 24UL) + (vstate * 256UL + vch) * 8UL); + if (vstate != -1UL) goto b36; + goto b4; +b36: vtag = *(u*)(*(u*)(vl + 16UL) + vstate * 8UL); + if (!vtag) goto b39; + *(u*)(vl + 104UL) = vtag; + *(u*)(vl + 80UL) = vptr; + *(u*)(vl + 128UL) = vlineno; + *(u*)(vl + 136UL) = vcolno; +b37: goto b3; +b39: goto b37; +b33: vcolno = vcolno + 1UL; + goto b31; +b26: goto b24; +b16: if ((s)*(u*)(vl + 88UL) < (s)(*(u*)(vl + 96UL) >> 1UL)) goto b21; v22 = (u)zmemcpy; - v23 = *(u*)(vl + 56UL); - v24 = *(u*)(vl + 56UL) + *(u*)(vl + 64UL) * 1UL; - v25 = *(u*)(vl + 80UL) - *(u*)(vl + 64UL); + v23 = *(u*)(vl + 64UL); + v24 = *(u*)(vl + 64UL) + *(u*)(vl + 72UL) * 1UL; + v25 = *(u*)(vl + 88UL) - *(u*)(vl + 72UL); v26 = ((u(*)())v22)(v23, v24, v25); v26; - vptr = vptr - *(u*)(vl + 64UL); - *(u*)(vl + 72UL) = *(u*)(vl + 72UL) - *(u*)(vl + 64UL); - *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 64UL); - *(u*)(vl + 64UL) = 0UL; - goto b12; -b19: goto b12; -b5: goto b3; + vptr = vptr - *(u*)(vl + 72UL); + *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 72UL); + *(u*)(vl + 88UL) = *(u*)(vl + 88UL) - *(u*)(vl + 72UL); + *(u*)(vl + 72UL) = 0UL; + goto b14; +b21: goto b14; +b7: goto b5; } u zhex2int(u vs, u vlen, u vok) { u vi = 0; @@ -29548,17 +29554,17 @@ b8: goto b6; b3: goto b1; } u zopen_lex(u vl, u vfd) { - *(u*)(vl + 32UL) = vfd; - *(u*)(vl + 40UL) = 0UL; + *(u*)(vl + 40UL) = vfd; *(u*)(vl + 48UL) = 0UL; - *(u*)(vl + 64UL) = 0UL; + *(u*)(vl + 56UL) = 0UL; *(u*)(vl + 72UL) = 0UL; *(u*)(vl + 80UL) = 0UL; - *(u*)(vl + 96UL) = -1UL; - *(u*)(vl + 104UL) = 1UL; + *(u*)(vl + 88UL) = 0UL; + *(u*)(vl + 104UL) = -1UL; *(u*)(vl + 112UL) = 1UL; *(u*)(vl + 120UL) = 1UL; *(u*)(vl + 128UL) = 1UL; + *(u*)(vl + 136UL) = 1UL; return 0UL; } u zopen_lines_out(u vc, u vfilename) { @@ -39695,7 +39701,7 @@ u zsetup_lex(u va) { u v10 = 0; v3 = (u)zalloc; v4 = va; - v5 = 136UL; + v5 = 144UL; v6 = ((u(*)())v3)(v4, v5); vl = v6; *(u*)(vl + 0UL) = va; @@ -39705,22 +39711,23 @@ u zsetup_lex(u va) { v9 = (u)zget_link_table; v10 = ((u(*)())v9)(); *(u*)(vl + 24UL) = v10; + *(u*)(vl + 32UL) = (u)zget_tag_name; vi = 0UL; b4: if (*(u*)(*(u*)(vl + 16UL) + vi * 8UL) != -1UL) goto b8; *(u*)(vl + 8UL) = vi; - *(u*)(vl + 32UL) = 0UL; - *(u*)(vl + 40UL) = 1UL; + *(u*)(vl + 40UL) = 0UL; *(u*)(vl + 48UL) = 1UL; - *(u*)(vl + 56UL) = 0UL; + *(u*)(vl + 56UL) = 1UL; *(u*)(vl + 64UL) = 0UL; *(u*)(vl + 72UL) = 0UL; *(u*)(vl + 80UL) = 0UL; *(u*)(vl + 88UL) = 0UL; - *(u*)(vl + 96UL) = -1UL; - *(u*)(vl + 104UL) = 1UL; + *(u*)(vl + 96UL) = 0UL; + *(u*)(vl + 104UL) = -1UL; *(u*)(vl + 112UL) = 1UL; *(u*)(vl + 120UL) = 1UL; *(u*)(vl + 128UL) = 1UL; + *(u*)(vl + 136UL) = 1UL; return vl; b8: vi = vi + 1UL; goto b4; diff --git a/cc4.om b/cc4.om @@ -70,93 +70,93 @@ lexer { COMMENT = "//" [[^\n]]* "\n"; } -//lalr { -// grammar = (enum_decl | struct_decl | union_decl | func_decl | lexer_grammar | lalr_grammar)*; -// -// enum_item = IDENT (ASSIGN expr)?; -// enum_decl = ENUM LEFT_BRACE (enum_item COMMA)* enum_item? RIGHT_BRACE; -// -// member_decl = IDENT COLON type_decl SEMI; -// struct_decl = STRUCT IDENT LEFT_BRACE member_decl* RIGHT_BRACE; -// union_decl = UNION IDENT LEFT_BRACE member_decl* RIGHT_BRACE; -// -// func_decl = FUNC IDENT func_type (SEMI | compound_stmt); -// -// type_decl = IDENT | BYTE | INT | VOID -// | FUNC func_type | ptr_type | LEFT_PAREN type_decl RIGHT_PAREN; -// -// ptr_type = STAR type_decl; -// -// arg_decl = IDENT COLON type_decl; -// func_type = LEFT_PAREN (arg_decl COMMA)* arg_decl? RIGHT_PAREN -// (COLON type_decl)?; -// -// stmt = if_stmt | loop_stmt | break_stmt | continue_stmt -// | return_stmt | var_stmt | label_stmt | goto_stmt -// | assign_stmt | expr_stmt | empty_stmt | compound_stmt; -// -// elif_stmt = ELSE IF expr compound_stmt; -// else_stmt = ELSE compound_stmt; -// if_stmt = IF expr compound_stmt elif_stmt* else_stmt?; -// -// loop_stmt = LOOP compound_stmt; -// -// break_stmt = BREAK SEMI; -// -// continue_stmt = CONTINUE SEMI; -// -// return_stmt = RETURN expr? SEMI; -// -// var_stmt = VAR IDENT COLON type_decl SEMI; -// -// label_stmt = IDENT COLON; -// -// goto_stmt = GOTO IDENT SEMI; -// -// assign_stmt = unary_expr ASSIGN expr SEMI; -// -// expr_stmt = expr SEMI; -// -// empty_stmt = SEMI; -// -// compound_stmt = LEFT_BRACE stmt* RIGHT_BRACE; -// -// expr = bool_expr; -// -// bool_expr = comp_expr ((AND_THEN|OR_ELSE) comp_expr)*; -// -// comp_expr = add_expr ((LE|GE|LT|GT|EQ|NE) add_expr)?; -// -// add_expr = mul_expr ((PLUS|MINUS|PIPE|XOR) mul_expr)*; -// -// mul_expr = shift_expr ((STAR|SLASH|MOD|AMP) shift_expr)*; -// -// shift_expr = unary_expr ((LEFT_SHIFT|RIGHT_SHIFT) unary_expr)*; -// -// unary_expr = (AMP|STAR|PLUS|MINUS|NOT|BANG)* post_expr; -// -// index_expr = LEFT_SQUARE expr RIGHT_SQUARE; -// call_expr = LEFT_PAREN (expr COMMA)* expr? RIGHT_PAREN; -// member_expr = DOT IDENT; -// cast_expr = AS type_decl; -// post_expr = primary (index_expr | call_expr | member_expr | cast_expr)*; -// -// primary = IDENT | HEXIDECIMAL | DECIMAL | STRING | CHAR -// | sizeof_expr | NIL | LEFT_PAREN expr RIGHT_PAREN; -// -// sizeof_expr = SIZEOF LEFT_PAREN expr RIGHT_PAREN; -// -// lexer_primary = LEFT_PAREN lexer_pattern RIGHT_PAREN | DOT | STRING | CHARSET; -// lexer_suffix = lexer_primary (STAR|PLUS|QMARK)*; -// lexer_alternative = lexer_suffix*; -// lexer_pattern = lexer_alternative (PIPE lexer_alternative)*; -// lexer_rule = IDENT ASSIGN lexer_pattern SEMI; -// lexer_grammar = LEX LEFT_BRACE lexer_rule+ RIGHT_BRACE; -// -// lalr_primary = LEFT_PAREN lalr_pattern RIGHT_PAREN | IDENT; -// lalr_suffix = lalr_primary (STAR|PLUS|QMARK)*; -// lalr_alternative = lalr_suffix*; -// lalr_pattern = lalr_alternative (PIPE lalr_alternative)*; -// lalr_rule = IDENT ASSIGN lalr_pattern SEMI; -// lalr_grammar = LALR LEFT_BRACE lalr_rule+ RIGHT_BRACE; -//} +lalr { + grammar = (enum_decl | struct_decl | union_decl | func_decl | lexer_grammar | lalr_grammar)*; + + enum_item = IDENT (ASSIGN expr)?; + enum_decl = ENUM LEFT_BRACE (enum_item COMMA)* enum_item? RIGHT_BRACE; + + member_decl = IDENT COLON type_decl SEMI; + struct_decl = STRUCT IDENT LEFT_BRACE member_decl* RIGHT_BRACE; + union_decl = UNION IDENT LEFT_BRACE member_decl* RIGHT_BRACE; + + func_decl = FUNC IDENT func_type (SEMI | compound_stmt); + + type_decl = IDENT | BYTE | INT | VOID + | FUNC func_type | ptr_type | LEFT_PAREN type_decl RIGHT_PAREN; + + ptr_type = STAR type_decl; + + arg_decl = IDENT COLON type_decl; + func_type = LEFT_PAREN (arg_decl COMMA)* arg_decl? RIGHT_PAREN + (COLON type_decl)?; + + stmt = if_stmt | loop_stmt | break_stmt | continue_stmt + | return_stmt | var_stmt | label_stmt | goto_stmt + | assign_stmt | expr_stmt | empty_stmt | compound_stmt; + + elif_stmt = ELSE IF expr compound_stmt; + else_stmt = ELSE compound_stmt; + if_stmt = IF expr compound_stmt elif_stmt* else_stmt?; + + loop_stmt = LOOP compound_stmt; + + break_stmt = BREAK SEMI; + + continue_stmt = CONTINUE SEMI; + + return_stmt = RETURN expr? SEMI; + + var_stmt = VAR IDENT COLON type_decl SEMI; + + label_stmt = IDENT COLON; + + goto_stmt = GOTO IDENT SEMI; + + assign_stmt = unary_expr ASSIGN expr SEMI; + + expr_stmt = expr SEMI; + + empty_stmt = SEMI; + + compound_stmt = LEFT_BRACE stmt* RIGHT_BRACE; + + expr = bool_expr; + + bool_expr = comp_expr ((AND_THEN|OR_ELSE) comp_expr)*; + + comp_expr = add_expr ((LE|GE|LT|GT|EQ|NE) add_expr)?; + + add_expr = mul_expr ((PLUS|MINUS|PIPE|XOR) mul_expr)*; + + mul_expr = shift_expr ((STAR|SLASH|MOD|AMP) shift_expr)*; + + shift_expr = unary_expr ((LEFT_SHIFT|RIGHT_SHIFT) unary_expr)*; + + unary_expr = (AMP|STAR|PLUS|MINUS|NOT|BANG)* post_expr; + + index_expr = LEFT_SQUARE expr RIGHT_SQUARE; + call_expr = LEFT_PAREN (expr COMMA)* expr? RIGHT_PAREN; + member_expr = DOT IDENT; + cast_expr = AS type_decl; + post_expr = primary (index_expr | call_expr | member_expr | cast_expr)*; + + primary = IDENT | HEXIDECIMAL | DECIMAL | STRING | CHAR + | sizeof_expr | NIL | LEFT_PAREN expr RIGHT_PAREN; + + sizeof_expr = SIZEOF LEFT_PAREN expr RIGHT_PAREN; + + lexer_primary = LEFT_PAREN lexer_pattern RIGHT_PAREN | DOT | STRING | CHARSET; + lexer_suffix = lexer_primary (STAR|PLUS|QMARK)*; + lexer_alternative = lexer_suffix*; + lexer_pattern = lexer_alternative (PIPE lexer_alternative)*; + lexer_rule = IDENT ASSIGN lexer_pattern SEMI; + lexer_grammar = LEX LEFT_BRACE lexer_rule+ RIGHT_BRACE; + + lalr_primary = LEFT_PAREN lalr_pattern RIGHT_PAREN | IDENT; + lalr_suffix = lalr_primary (STAR|PLUS|QMARK)*; + lalr_alternative = lalr_suffix*; + lalr_pattern = lalr_alternative (PIPE lalr_alternative)*; + lalr_rule = IDENT ASSIGN lalr_pattern SEMI; + lalr_grammar = LALR LEFT_BRACE lalr_rule+ RIGHT_BRACE; +} diff --git a/dump.om b/dump.om @@ -1,31 +0,0 @@ -func main(argc: int, argv: **byte, envp: **byte) { - var fd: int; - var f: *file; - var out: *file; - var peg: *peg; - var a: alloc; - var len: int; - var src: *byte; - var node: *peg_node; - setup_alloc(&a); - - fd = open(argv[1], 0, 0); - if fd < 0 { - die("open failed"); - } - - f = fopen(fd, &a); - src = freadall(f, &len); - fclose(f); - - peg = peg_new(argv[1], src, len, &a); - node = peg_parse(peg, P_sp); - peg_free(peg); - - out = fopen(1, &a); - peg_show(peg, out, node); - fputc(out, '\n'); - - fflush(out); - fclose(out); -} diff --git a/lexer.om b/lexer.om @@ -1249,3 +1249,203 @@ func lexer_compile_get_tag_name(c: *compiler, pn: *peg_node) { define_ir_func(c, ic, func_type); } + +func get_tag_table(): *int; +func get_link_table(): *int; +func get_tag_name(tag: int): *byte; + +struct lex { + a: *alloc; + + num_states: int; + tag: *int; + link: *int; + tag_to_str: (func(t:int):*byte); + + fd: int; + eof: int; + goteof: int; + + buf: *byte; + start: int; + end: int; + fill: int; + cap: int; + + match: int; + start_lineno: int; + start_colno: int; + end_lineno: int; + end_colno: int; +} + +func setup_lex(a: *alloc): *lex { + var l: *lex; + var i: int; + l = alloc(a, sizeof(*l)) as *lex; + l.a = a; + l.tag = get_tag_table(); + l.link = get_link_table(); + l.tag_to_str = get_tag_name; + i = 0; + loop { + if l.tag[i] == -1 { + break; + } + i = i + 1; + } + l.num_states = i; + l.fd = 0; + l.eof = 1; + l.goteof = 1; + l.buf = nil; + l.start = 0; + l.end = 0; + l.fill = 0; + l.cap = 0; + l.match = -1; + l.start_lineno = 1; + l.start_colno = 1; + l.end_lineno = 1; + l.end_colno = 1; + return l; +} + +func open_lex(l: *lex, fd: int) { + l.fd = fd; + l.eof = 0; + l.goteof = 0; + + l.start = 0; + l.end = 0; + l.fill = 0; + + l.match = -1; + + l.start_lineno = 1; + l.start_colno = 1; + + l.end_lineno = 1; + l.end_colno = 1; +} + +func gettok(l: *lex): int { + var state: int; + var ch: int; + var tag: int; + var ptr: int; + var lineno: int; + var colno: int; + var ret: int; + var tmp: *byte; + + loop { + ptr = l.end; + lineno = l.end_lineno; + colno = l.end_colno; + + l.start = ptr; + l.start_lineno = lineno; + l.start_colno = colno; + + l.match = -1; + l.end_lineno = lineno; + l.end_colno = colno; + + state = 0; + loop { + if ptr == l.fill { + if l.goteof { + if l.start == l.fill { + l.eof = 1; + } + break; + } + + if l.fill - l.start >= (l.cap >> 1) { + l.cap = l.cap * 2 + 256; + tmp = alloc(l.a, l.cap); + memcpy(tmp, &l.buf[l.start], l.fill - l.start); + free(l.a, l.buf); + l.buf = tmp; + ptr = ptr - l.start; + l.end = l.end - l.start; + l.fill = l.fill - l.start; + l.start = 0; + } else if l.fill >= (l.cap >> 1) { + memcpy(l.buf, &l.buf[l.start], l.fill - l.start); + ptr = ptr - l.start; + l.end = l.end - l.start; + l.fill = l.fill - l.start; + l.start = 0; + } + + ret = read(l.fd, &l.buf[l.fill], l.cap - l.fill); + if ret < 0 { + die("read failed"); + } + + if ret == 0 { + l.goteof = 1; + break; + } + + l.fill = l.fill + ret; + } + + ch = l.buf[ptr] as int; + ptr = ptr + 1; + + if ch == '\n' { + lineno = lineno + 1; + colno = 1; + } else { + colno = colno + 1; + } + + state = l.link[state * 256 + ch]; + if state == -1 { + break; + } + + tag = l.tag[state]; + if tag { + l.match = tag; + l.end = ptr; + l.end_lineno = lineno; + l.end_colno = colno; + } + } + + if l.match == SPACE || l.match == COMMENT { + continue; + } + + return l.match; + } +} + +func dolex(a: *alloc, name: *byte) { + var l: *lex; + var fd: int; + var tok: int; + l = setup_lex(a); + fd = open(name, 0, 0); + if fd < 0 { + die("open failed"); + } + open_lex(l, fd); + loop { + tok = gettok(l); + if tok == -1 { + break; + } + fputs(nil, l.tag_to_str(tok)); + fputs(nil, "\t'"); + fputb(nil, &l.buf[l.start], l.end - l.start); + fputs(nil, "'\n"); + } + if !l.eof { + die("problem"); + } +} diff --git a/lexlib.om b/lexlib.om @@ -1,197 +0,0 @@ -func get_tag_table(): *int; -func get_link_table(): *int; -func get_tag_name(tag: int): *byte; - -struct lex { - a: *alloc; - - num_states: int; - tag: *int; - link: *int; - - fd: int; - eof: int; - goteof: int; - - buf: *byte; - start: int; - end: int; - fill: int; - cap: int; - - match: int; - start_lineno: int; - start_colno: int; - end_lineno: int; - end_colno: int; -} - -func setup_lex(a: *alloc): *lex { - var l: *lex; - var i: int; - l = alloc(a, sizeof(*l)) as *lex; - l.a = a; - l.tag = get_tag_table(); - l.link = get_link_table(); - i = 0; - loop { - if l.tag[i] == -1 { - break; - } - i = i + 1; - } - l.num_states = i; - l.fd = 0; - l.eof = 1; - l.goteof = 1; - l.buf = nil; - l.start = 0; - l.end = 0; - l.fill = 0; - l.cap = 0; - l.match = -1; - l.start_lineno = 1; - l.start_colno = 1; - l.end_lineno = 1; - l.end_colno = 1; - return l; -} - -func open_lex(l: *lex, fd: int) { - l.fd = fd; - l.eof = 0; - l.goteof = 0; - - l.start = 0; - l.end = 0; - l.fill = 0; - - l.match = -1; - - l.start_lineno = 1; - l.start_colno = 1; - - l.end_lineno = 1; - l.end_colno = 1; -} - -func gettok(l: *lex): int { - var state: int; - var ch: int; - var tag: int; - var ptr: int; - var lineno: int; - var colno: int; - var ret: int; - var tmp: *byte; - - ptr = l.end; - lineno = l.end_lineno; - colno = l.end_colno; - - l.start = ptr; - l.start_lineno = lineno; - l.start_colno = colno; - - l.match = -1; - l.end_lineno = lineno; - l.end_colno = colno; - - state = 0; - loop { - if ptr == l.fill { - if l.goteof { - if l.start == l.fill { - l.eof = 1; - } - break; - } - - if l.fill - l.start >= (l.cap >> 1) { - l.cap = l.cap * 2 + 256; - tmp = alloc(l.a, l.cap); - memcpy(tmp, &l.buf[l.start], l.fill - l.start); - free(l.a, l.buf); - l.buf = tmp; - ptr = ptr - l.start; - l.end = l.end - l.start; - l.fill = l.fill - l.start; - l.start = 0; - } else if l.fill >= (l.cap >> 1) { - memcpy(l.buf, &l.buf[l.start], l.fill - l.start); - ptr = ptr - l.start; - l.end = l.end - l.start; - l.fill = l.fill - l.start; - l.start = 0; - } - - ret = read(l.fd, &l.buf[l.fill], l.cap - l.fill); - if ret < 0 { - die("read failed"); - } - - if ret == 0 { - l.goteof = 1; - break; - } - - l.fill = l.fill + ret; - } - - ch = l.buf[ptr] as int; - ptr = ptr + 1; - - if ch == '\n' { - lineno = lineno + 1; - colno = 1; - } else { - colno = colno + 1; - } - - state = l.link[state * 256 + ch]; - if state == -1 { - break; - } - - tag = l.tag[state]; - if tag { - l.match = tag; - l.end = ptr; - l.end_lineno = lineno; - l.end_colno = colno; - } - } - - return l.match; -} - -func dolex(a: *alloc, name: *byte) { - var l: *lex; - var fd: int; - var tok: int; - l = setup_lex(a); - fd = open(name, 0, 0); - if fd < 0 { - die("open failed"); - } - open_lex(l, fd); - loop { - tok = gettok(l); - if tok == -1 { - break; - } - if tok == SPACE { - continue; - } - if tok == COMMENT { - continue; - } - fputs(nil, get_tag_name(tok)); - fputs(nil, "\t'"); - fputb(nil, &l.buf[l.start], l.end - l.start); - fputs(nil, "'\n"); - } - if !l.eof { - die("problem"); - } -}