os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 1dde52de096e9a61c09328e7965a7638f8bba18d
parent 9748cf0b895c37ac1fcc9adb74e9bbe278d533cb
Author: erai <erai@omiltem.net>
Date:   Sat, 29 Mar 2025 01:01:28 +0000

add lexer sublanguage peg

Diffstat:
Mbootstrap.sh | 2+-
Mcc0.c | 120++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mcc3.om | 4++--
Acc4.om | 161+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alalr.om | 3+++
Alexer.om | 3+++
Mparse2.om | 11+++++++++++
7 files changed, 251 insertions(+), 53 deletions(-)

diff --git a/bootstrap.sh b/bootstrap.sh @@ -2,7 +2,7 @@ BOOTSTRAP="cc0.c" LIBS="bufio.om lib.om alloc.om syscall.om" -SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om" +SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om" # Build the bootstrap compiler from c [ cc0 -nt cc0.c ] || gcc -O1 -g -std=c89 ${BOOTSTRAP} -o cc0 diff --git a/cc0.c b/cc0.c @@ -246,11 +246,11 @@ u zpeg_P_lex(); u zpeg_P_lexer_alternative(); u zpeg_P_lexer_charset(); u zpeg_P_lexer_dot(); +u zpeg_P_lexer_grammar(); u zpeg_P_lexer_op(); u zpeg_P_lexer_pattern(); u zpeg_P_lexer_primary(); u zpeg_P_lexer_rule(); -u zpeg_P_lexer_spec(); u zpeg_P_lexer_suffix(); u zpeg_P_loop(); u zpeg_P_loop_stmt(); @@ -639,7 +639,7 @@ b240: if (vtag != 120UL) goto b242; b242: if (vtag != 121UL) goto b244; return (u)"P_lexer_rule"; b244: if (vtag != 122UL) goto b246; - return (u)"P_lexer_spec"; + return (u)"P_lexer_grammar"; b246: return 0UL; } u z_start(u vargc, u vargv, u venvp) { @@ -28318,7 +28318,7 @@ b26: zchoice(vc); if (v7 == 0UL) goto b30; goto b9; b30: zchoice(vc); - v8 = zpeg_P_lexer_spec(vc); + v8 = zpeg_P_lexer_grammar(vc); if (v8 == 0UL) goto b34; goto b9; b34: zfail(vc); @@ -28781,6 +28781,40 @@ u zpeg_P_lexer_dot(u vc) { b1: zfail(vc); return 0UL; } +u zpeg_P_lexer_grammar(u vc) { + u v1 = 0; + u v2 = 0; + u v3 = 0; + u v4 = 0; + u v5 = 0; + u v6 = 0; + u v7 = 0; + u v8 = 0; + zenter(vc, 122UL); + v1 = zliteral(vc, (u)"lexer"); + if (v1 == 0UL) goto b1; + v2 = zpeg_P_sp(vc); + if (v2 == 0UL) goto b1; + v3 = zliteral(vc, (u)"{"); + if (v3 == 0UL) goto b1; + v4 = zpeg_P_sp(vc); + if (v4 == 0UL) goto b1; + v5 = zpeg_P_lexer_rule(vc); + if (v5 == 0UL) goto b1; +b13: zchoice(vc); + v6 = zpeg_P_lexer_rule(vc); + if (v6 == 0UL) goto b14; + zcommit(vc); + goto b13; +b14: v7 = zliteral(vc, (u)"}"); + if (v7 == 0UL) goto b1; + v8 = zpeg_P_sp(vc); + if (v8 == 0UL) goto b1; + zleave(vc, 122UL); + return 1UL; +b1: zfail(vc); + return 0UL; +} u zpeg_P_lexer_op(u vc) { u v1 = 0; u v2 = 0; @@ -28903,40 +28937,6 @@ u zpeg_P_lexer_rule(u vc) { b1: zfail(vc); return 0UL; } -u zpeg_P_lexer_spec(u vc) { - u v1 = 0; - u v2 = 0; - u v3 = 0; - u v4 = 0; - u v5 = 0; - u v6 = 0; - u v7 = 0; - u v8 = 0; - zenter(vc, 122UL); - v1 = zliteral(vc, (u)"lexer"); - if (v1 == 0UL) goto b1; - v2 = zpeg_P_sp(vc); - if (v2 == 0UL) goto b1; - v3 = zliteral(vc, (u)"{"); - if (v3 == 0UL) goto b1; - v4 = zpeg_P_sp(vc); - if (v4 == 0UL) goto b1; - v5 = zpeg_P_lexer_rule(vc); - if (v5 == 0UL) goto b1; -b13: zchoice(vc); - v6 = zpeg_P_lexer_rule(vc); - if (v6 == 0UL) goto b14; - zcommit(vc); - goto b13; -b14: v7 = zliteral(vc, (u)"}"); - if (v7 == 0UL) goto b1; - v8 = zpeg_P_sp(vc); - if (v8 == 0UL) goto b1; - zleave(vc, 122UL); - return 1UL; -b1: zfail(vc); - return 0UL; -} u zpeg_P_lexer_suffix(u vc) { u v1 = 0; u v2 = 0; @@ -31320,6 +31320,12 @@ u zreconstruct(u vc, u vpn) { u v41 = 0; u v42 = 0; u v43 = 0; + u v44 = 0; + u v45 = 0; + u v46 = 0; + u v47 = 0; + u v48 = 0; + u v49 = 0; v6 = (u)zassert; v7 = (u)(*(u*)(vpn + 0UL) == 0UL); v8 = (u)"grammar"; @@ -31338,17 +31344,17 @@ b6: if (*(u*)(vpn + 0UL) != 2UL) goto b12; v13 = vpn; v14 = ((u(*)())v11)(v12, v13); vn = v14; -b10: v35 = (u)zmknode1; - v36 = vc; - v37 = 14UL; - v38 = vn; - v39 = ((u(*)())v35)(v36, v37, v38); - vp = v39; - v40 = (u)zcopypos; - v41 = vp; - v42 = vpn; - v43 = ((u(*)())v40)(v41, v42); - v43; +b10: v41 = (u)zmknode1; + v42 = vc; + v43 = 14UL; + v44 = vn; + v45 = ((u(*)())v41)(v42, v43, v44); + vp = v45; + v46 = (u)zcopypos; + v47 = vp; + v48 = vpn; + v49 = ((u(*)())v46)(v47, v48); + v49; *(u*)vlink = vp; vlink = vp + 16UL; vpn = *(u*)(vpn + 8UL); @@ -31383,10 +31389,24 @@ b21: if (*(u*)(vpn + 0UL) != 93UL) goto b24; v31; vpn = *(u*)(vpn + 8UL); goto b2; -b24: v32 = (u)zdie; - v33 = (u)"invalid decl"; +b24: if (*(u*)(vpn + 0UL) != 122UL) goto b27; + v32 = (u)zdie; + v33 = (u)"lexer"; v34 = ((u(*)())v32)(v33); v34; + vpn = *(u*)(vpn + 8UL); + goto b2; +b27: if (*(u*)(vpn + 0UL) != 113UL) goto b30; + v35 = (u)zdie; + v36 = (u)"lalr"; + v37 = ((u(*)())v35)(v36); + v37; + vpn = *(u*)(vpn + 8UL); + goto b2; +b30: v38 = (u)zdie; + v39 = (u)"invalid decl"; + v40 = ((u(*)())v38)(v39); + v40; goto b10; b8: v10 = 1UL; goto b9; diff --git a/cc3.om b/cc3.om @@ -1,5 +1,5 @@ peg_grammar { - grammar = sp (enum_decl / struct_decl / union_decl / func_decl / peg_grammar / lalr_grammar / lexer_spec)* !.; + grammar = sp (enum_decl / struct_decl / union_decl / func_decl / peg_grammar / lalr_grammar / lexer_grammar)* !.; enum_item = ident sp ("=" sp expr)?; enum_decl = enum sp "{" sp (enum_item ("," sp enum_item)*)? ("," sp)? "}" sp; @@ -165,5 +165,5 @@ peg_grammar { lexer_alternative = lexer_suffix*; lexer_pattern = lexer_alternative ("|" sp lexer_alternative)*; lexer_rule = ident sp "=" sp lexer_pattern ";" sp; - lexer_spec = "lexer" sp "{" sp lexer_rule+ "}" sp; + lexer_grammar = "lexer" sp "{" sp lexer_rule+ "}" sp; } diff --git a/cc4.om b/cc4.om @@ -0,0 +1,161 @@ +lexer { + LEFT_BRACE = "{"; + RIGHT_BRACE = "}"; + + LEFT_SQUARE = "["; + RIGHT_SQUARE = "]"; + + LEFT_PAREN = "("; + RIGHT_PAREN = ")"; + + DOT = "."; + ASSIGN = "="; + SEMI = ";"; + COLON = ":"; + QMARK = "?"; + STAR = "*"; + AMP = "&"; + COMMA = ","; + BANG = "!"; + PLUS = "+"; + MINUS = "-"; + NOT = "~"; + SLASH = "/"; + MOD = "%"; + PIPE = "|"; + XOR = "^"; + + AND_THEN = "&&"; + OR_ELSE = "||"; + + LE = "<="; + GE = ">="; + LT = "<"; + GT = ">"; + EQ = "=="; + NE = "!="; + + LEFT_SHIFT = "<<"; + RIGHT_SHIFT = ">>"; + + RETURN = "return"; + BREAK = "break"; + SIZEOF = "sizeof"; + IF = "if"; + ELSE = "else"; + LOOP = "loop"; + CONTINUE = "continue"; + GOTO = "goto"; + VAR = "var"; + ENUM = "enum"; + STRUCT = "struct"; + UNION = "union"; + BYTE = "byte"; + INT = "int"; + VOID = "void"; + FUNC = "func"; + AS = "as"; + NIL = "nil"; + LEX = "lexer"; + LALR = "lalr"; + + IDENT = [[a-zA-Z_]][[a-zA-Z0-9_]]*; + HEXIDECIMAL = "0x" [[0-9a-fA-F]] ([[0-9a-fA-F_]]* [[0-9a-fA-F]])?; + DECIMAL = [[0-9]] ([[0-9_]]* [[0-9]])?; + STRING = "\"" ("\\" . | [[^\\\x22]])* "\""; + CHAR = "'" ("\\" . | [[^\\\x27]])* "'"; + CHARSET = "[[" ([[^\]\\]]|"\\".)* "]]"; + + SPACE = ([[ \r\n\t]] | "//" [[^\n]]*)*; +} + +//lalr { +// grammar = (enum_decl | struct_decl | union_decl | func_decl | lexer_grammar | lalr_grammar)*; +// +// enum_item = IDENT (ASSIGN expr)?; +// enum_decl = ENUM LEFT_BRACE (enum_item COMMA)* enum_item? RIGHT_BRACE; +// +// member_decl = IDENT COLON type_decl SEMI; +// struct_decl = STRUCT IDENT LEFT_BRACE member_decl* RIGHT_BRACE; +// union_decl = UNION IDENT LEFT_BRACE member_decl* RIGHT_BRACE; +// +// func_decl = FUNC IDENT func_type (SEMI | compound_stmt); +// +// type_decl = IDENT | BYTE | INT | VOID +// | FUNC func_type | ptr_type | LEFT_PAREN type_decl RIGHT_PAREN; +// +// ptr_type = STAR type_decl; +// +// arg_decl = IDENT COLON type_decl; +// func_type = LEFT_PAREN (arg_decl COMMA)* arg_decl? RIGHT_PAREN +// (COLON type_decl)?; +// +// stmt = if_stmt | loop_stmt | break_stmt | continue_stmt +// | return_stmt | var_stmt | label_stmt | goto_stmt +// | assign_stmt | expr_stmt | empty_stmt | compound_stmt; +// +// elif_stmt = ELSE IF expr compound_stmt; +// else_stmt = ELSE compound_stmt; +// if_stmt = IF expr compound_stmt elif_stmt* else_stmt?; +// +// loop_stmt = LOOP compound_stmt; +// +// break_stmt = BREAK SEMI; +// +// continue_stmt = CONTINUE SEMI; +// +// return_stmt = RETURN expr? SEMI; +// +// var_stmt = VAR IDENT COLON type_decl SEMI; +// +// label_stmt = IDENT COLON; +// +// goto_stmt = GOTO IDENT SEMI; +// +// assign_stmt = unary_expr ASSIGN expr SEMI; +// +// expr_stmt = expr SEMI; +// +// empty_stmt = SEMI; +// +// compound_stmt = LEFT_BRACE stmt* RIGHT_BRACE; +// +// expr = bool_expr; +// +// bool_expr = comp_expr ((AND_THEN|OR_ELSE) comp_expr)*; +// +// comp_expr = add_expr ((LE|GE|LT|GT|EQ|NE) add_expr)?; +// +// add_expr = mul_expr ((PLUS|MINUS|PIPE|XOR) mul_expr)*; +// +// mul_expr = shift_expr ((STAR|SLASH|MOD|AMP) shift_expr)*; +// +// shift_expr = unary_expr ((LEFT_SHIFT|RIGHT_SHIFT) unary_expr)*; +// +// unary_expr = (AMP|STAR|PLUS|MINUS|NOT|BANG)* post_expr; +// +// index_expr = LEFT_SQUARE expr RIGHT_SQUARE; +// call_expr = LEFT_PAREN (expr COMMA)* expr? RIGHT_PAREN; +// member_expr = DOT IDENT; +// cast_expr = AS type_decl; +// post_expr = primary (index_expr | call_expr | member_expr | cast_expr)*; +// +// primary = IDENT | HEXIDECIMAL | DECIMAL | STRING | CHAR +// | sizeof_expr | NIL | LEFT_PAREN expr RIGHT_PAREN; +// +// sizeof_expr = SIZEOF LEFT_PAREN expr RIGHT_PAREN; +// +// lexer_primary = LEFT_PAREN lexer_pattern RIGHT_PAREN | DOT | STRING | CHARSET; +// lexer_suffix = lexer_primary (STAR|PLUS|QMARK)*; +// lexer_alternative = lexer_suffix*; +// lexer_pattern = lexer_alternative (PIPE lexer_alternative)*; +// lexer_rule = IDENT ASSIGN lexer_pattern SEMI; +// lexer_grammar = LEX LEFT_BRACE lexer_rule+ RIGHT_BRACE; +// +// lalr_primary = LEFT_PAREN lalr_pattern RIGHT_PAREN | IDENT; +// lalr_suffix = lalr_primary (STAR|PLUS|QMARK)*; +// lalr_alternative = lalr_suffix*; +// lalr_pattern = lalr_alternative (PIPE lalr_alternative)*; +// lalr_rule = IDENT ASSIGN lalr_pattern SEMI; +// lalr_grammar = LALR LEFT_BRACE lalr_rule+ RIGHT_BRACE; +//} diff --git a/lalr.om b/lalr.om @@ -0,0 +1,3 @@ +func lalr_compiler(c: *compiler, pn: *peg_node, err: *file) { + //die("lalr"); +} diff --git a/lexer.om b/lexer.om @@ -0,0 +1,3 @@ +func lexer_compile(c: *compiler, pn: *peg_node, err: *file) { + die("lexer"); +} diff --git a/parse2.om b/parse2.om @@ -2,6 +2,7 @@ struct parser { a: *alloc; p: *peg; pc: *peg_compiler; + c: *compiler; err: *file; } @@ -18,6 +19,8 @@ func setup_parser(cc: *compiler, err: *file): *parser { c.err = err; + c.c = cc; + return c; } @@ -83,6 +86,14 @@ func reconstruct(c: *parser, pn: *peg_node): *node { peg_compile(c.pc, pn, c.err); pn = pn.next; continue; + } else if pn.tag == P_lexer_grammar { + lexer_compile(c.c, pn, c.err); + pn = pn.next; + continue; + } else if pn.tag == P_lalr_grammar { + lalr_compiler(c.c, pn, c.err); + pn = pn.next; + continue; } else { die("invalid decl"); }