commit 1dde52de096e9a61c09328e7965a7638f8bba18d
parent 9748cf0b895c37ac1fcc9adb74e9bbe278d533cb
Author: erai <erai@omiltem.net>
Date: Sat, 29 Mar 2025 01:01:28 +0000
add lexer sublanguage peg
Diffstat:
M | bootstrap.sh | | | 2 | +- |
M | cc0.c | | | 120 | ++++++++++++++++++++++++++++++++++++++++++++++--------------------------------- |
M | cc3.om | | | 4 | ++-- |
A | cc4.om | | | 161 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | lalr.om | | | 3 | +++ |
A | lexer.om | | | 3 | +++ |
M | parse2.om | | | 11 | +++++++++++ |
7 files changed, 251 insertions(+), 53 deletions(-)
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -2,7 +2,7 @@
BOOTSTRAP="cc0.c"
LIBS="bufio.om lib.om alloc.om syscall.om"
-SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om"
+SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om"
# Build the bootstrap compiler from c
[ cc0 -nt cc0.c ] || gcc -O1 -g -std=c89 ${BOOTSTRAP} -o cc0
diff --git a/cc0.c b/cc0.c
@@ -246,11 +246,11 @@ u zpeg_P_lex();
u zpeg_P_lexer_alternative();
u zpeg_P_lexer_charset();
u zpeg_P_lexer_dot();
+u zpeg_P_lexer_grammar();
u zpeg_P_lexer_op();
u zpeg_P_lexer_pattern();
u zpeg_P_lexer_primary();
u zpeg_P_lexer_rule();
-u zpeg_P_lexer_spec();
u zpeg_P_lexer_suffix();
u zpeg_P_loop();
u zpeg_P_loop_stmt();
@@ -639,7 +639,7 @@ b240: if (vtag != 120UL) goto b242;
b242: if (vtag != 121UL) goto b244;
return (u)"P_lexer_rule";
b244: if (vtag != 122UL) goto b246;
- return (u)"P_lexer_spec";
+ return (u)"P_lexer_grammar";
b246: return 0UL;
}
u z_start(u vargc, u vargv, u venvp) {
@@ -28318,7 +28318,7 @@ b26: zchoice(vc);
if (v7 == 0UL) goto b30;
goto b9;
b30: zchoice(vc);
- v8 = zpeg_P_lexer_spec(vc);
+ v8 = zpeg_P_lexer_grammar(vc);
if (v8 == 0UL) goto b34;
goto b9;
b34: zfail(vc);
@@ -28781,6 +28781,40 @@ u zpeg_P_lexer_dot(u vc) {
b1: zfail(vc);
return 0UL;
}
+u zpeg_P_lexer_grammar(u vc) {
+ u v1 = 0;
+ u v2 = 0;
+ u v3 = 0;
+ u v4 = 0;
+ u v5 = 0;
+ u v6 = 0;
+ u v7 = 0;
+ u v8 = 0;
+ zenter(vc, 122UL);
+ v1 = zliteral(vc, (u)"lexer");
+ if (v1 == 0UL) goto b1;
+ v2 = zpeg_P_sp(vc);
+ if (v2 == 0UL) goto b1;
+ v3 = zliteral(vc, (u)"{");
+ if (v3 == 0UL) goto b1;
+ v4 = zpeg_P_sp(vc);
+ if (v4 == 0UL) goto b1;
+ v5 = zpeg_P_lexer_rule(vc);
+ if (v5 == 0UL) goto b1;
+b13: zchoice(vc);
+ v6 = zpeg_P_lexer_rule(vc);
+ if (v6 == 0UL) goto b14;
+ zcommit(vc);
+ goto b13;
+b14: v7 = zliteral(vc, (u)"}");
+ if (v7 == 0UL) goto b1;
+ v8 = zpeg_P_sp(vc);
+ if (v8 == 0UL) goto b1;
+ zleave(vc, 122UL);
+ return 1UL;
+b1: zfail(vc);
+ return 0UL;
+}
u zpeg_P_lexer_op(u vc) {
u v1 = 0;
u v2 = 0;
@@ -28903,40 +28937,6 @@ u zpeg_P_lexer_rule(u vc) {
b1: zfail(vc);
return 0UL;
}
-u zpeg_P_lexer_spec(u vc) {
- u v1 = 0;
- u v2 = 0;
- u v3 = 0;
- u v4 = 0;
- u v5 = 0;
- u v6 = 0;
- u v7 = 0;
- u v8 = 0;
- zenter(vc, 122UL);
- v1 = zliteral(vc, (u)"lexer");
- if (v1 == 0UL) goto b1;
- v2 = zpeg_P_sp(vc);
- if (v2 == 0UL) goto b1;
- v3 = zliteral(vc, (u)"{");
- if (v3 == 0UL) goto b1;
- v4 = zpeg_P_sp(vc);
- if (v4 == 0UL) goto b1;
- v5 = zpeg_P_lexer_rule(vc);
- if (v5 == 0UL) goto b1;
-b13: zchoice(vc);
- v6 = zpeg_P_lexer_rule(vc);
- if (v6 == 0UL) goto b14;
- zcommit(vc);
- goto b13;
-b14: v7 = zliteral(vc, (u)"}");
- if (v7 == 0UL) goto b1;
- v8 = zpeg_P_sp(vc);
- if (v8 == 0UL) goto b1;
- zleave(vc, 122UL);
- return 1UL;
-b1: zfail(vc);
- return 0UL;
-}
u zpeg_P_lexer_suffix(u vc) {
u v1 = 0;
u v2 = 0;
@@ -31320,6 +31320,12 @@ u zreconstruct(u vc, u vpn) {
u v41 = 0;
u v42 = 0;
u v43 = 0;
+ u v44 = 0;
+ u v45 = 0;
+ u v46 = 0;
+ u v47 = 0;
+ u v48 = 0;
+ u v49 = 0;
v6 = (u)zassert;
v7 = (u)(*(u*)(vpn + 0UL) == 0UL);
v8 = (u)"grammar";
@@ -31338,17 +31344,17 @@ b6: if (*(u*)(vpn + 0UL) != 2UL) goto b12;
v13 = vpn;
v14 = ((u(*)())v11)(v12, v13);
vn = v14;
-b10: v35 = (u)zmknode1;
- v36 = vc;
- v37 = 14UL;
- v38 = vn;
- v39 = ((u(*)())v35)(v36, v37, v38);
- vp = v39;
- v40 = (u)zcopypos;
- v41 = vp;
- v42 = vpn;
- v43 = ((u(*)())v40)(v41, v42);
- v43;
+b10: v41 = (u)zmknode1;
+ v42 = vc;
+ v43 = 14UL;
+ v44 = vn;
+ v45 = ((u(*)())v41)(v42, v43, v44);
+ vp = v45;
+ v46 = (u)zcopypos;
+ v47 = vp;
+ v48 = vpn;
+ v49 = ((u(*)())v46)(v47, v48);
+ v49;
*(u*)vlink = vp;
vlink = vp + 16UL;
vpn = *(u*)(vpn + 8UL);
@@ -31383,10 +31389,24 @@ b21: if (*(u*)(vpn + 0UL) != 93UL) goto b24;
v31;
vpn = *(u*)(vpn + 8UL);
goto b2;
-b24: v32 = (u)zdie;
- v33 = (u)"invalid decl";
+b24: if (*(u*)(vpn + 0UL) != 122UL) goto b27;
+ v32 = (u)zdie;
+ v33 = (u)"lexer";
v34 = ((u(*)())v32)(v33);
v34;
+ vpn = *(u*)(vpn + 8UL);
+ goto b2;
+b27: if (*(u*)(vpn + 0UL) != 113UL) goto b30;
+ v35 = (u)zdie;
+ v36 = (u)"lalr";
+ v37 = ((u(*)())v35)(v36);
+ v37;
+ vpn = *(u*)(vpn + 8UL);
+ goto b2;
+b30: v38 = (u)zdie;
+ v39 = (u)"invalid decl";
+ v40 = ((u(*)())v38)(v39);
+ v40;
goto b10;
b8: v10 = 1UL;
goto b9;
diff --git a/cc3.om b/cc3.om
@@ -1,5 +1,5 @@
peg_grammar {
- grammar = sp (enum_decl / struct_decl / union_decl / func_decl / peg_grammar / lalr_grammar / lexer_spec)* !.;
+ grammar = sp (enum_decl / struct_decl / union_decl / func_decl / peg_grammar / lalr_grammar / lexer_grammar)* !.;
enum_item = ident sp ("=" sp expr)?;
enum_decl = enum sp "{" sp (enum_item ("," sp enum_item)*)? ("," sp)? "}" sp;
@@ -165,5 +165,5 @@ peg_grammar {
lexer_alternative = lexer_suffix*;
lexer_pattern = lexer_alternative ("|" sp lexer_alternative)*;
lexer_rule = ident sp "=" sp lexer_pattern ";" sp;
- lexer_spec = "lexer" sp "{" sp lexer_rule+ "}" sp;
+ lexer_grammar = "lexer" sp "{" sp lexer_rule+ "}" sp;
}
diff --git a/cc4.om b/cc4.om
@@ -0,0 +1,161 @@
+lexer {
+ LEFT_BRACE = "{";
+ RIGHT_BRACE = "}";
+
+ LEFT_SQUARE = "[";
+ RIGHT_SQUARE = "]";
+
+ LEFT_PAREN = "(";
+ RIGHT_PAREN = ")";
+
+ DOT = ".";
+ ASSIGN = "=";
+ SEMI = ";";
+ COLON = ":";
+ QMARK = "?";
+ STAR = "*";
+ AMP = "&";
+ COMMA = ",";
+ BANG = "!";
+ PLUS = "+";
+ MINUS = "-";
+ NOT = "~";
+ SLASH = "/";
+ MOD = "%";
+ PIPE = "|";
+ XOR = "^";
+
+ AND_THEN = "&&";
+ OR_ELSE = "||";
+
+ LE = "<=";
+ GE = ">=";
+ LT = "<";
+ GT = ">";
+ EQ = "==";
+ NE = "!=";
+
+ LEFT_SHIFT = "<<";
+ RIGHT_SHIFT = ">>";
+
+ RETURN = "return";
+ BREAK = "break";
+ SIZEOF = "sizeof";
+ IF = "if";
+ ELSE = "else";
+ LOOP = "loop";
+ CONTINUE = "continue";
+ GOTO = "goto";
+ VAR = "var";
+ ENUM = "enum";
+ STRUCT = "struct";
+ UNION = "union";
+ BYTE = "byte";
+ INT = "int";
+ VOID = "void";
+ FUNC = "func";
+ AS = "as";
+ NIL = "nil";
+ LEX = "lexer";
+ LALR = "lalr";
+
+ IDENT = [[a-zA-Z_]][[a-zA-Z0-9_]]*;
+ HEXIDECIMAL = "0x" [[0-9a-fA-F]] ([[0-9a-fA-F_]]* [[0-9a-fA-F]])?;
+ DECIMAL = [[0-9]] ([[0-9_]]* [[0-9]])?;
+ STRING = "\"" ("\\" . | [[^\\\x22]])* "\"";
+ CHAR = "'" ("\\" . | [[^\\\x27]])* "'";
+ CHARSET = "[[" ([[^\]\\]]|"\\".)* "]]";
+
+ SPACE = ([[ \r\n\t]] | "//" [[^\n]]*)*;
+}
+
+//lalr {
+// grammar = (enum_decl | struct_decl | union_decl | func_decl | lexer_grammar | lalr_grammar)*;
+//
+// enum_item = IDENT (ASSIGN expr)?;
+// enum_decl = ENUM LEFT_BRACE (enum_item COMMA)* enum_item? RIGHT_BRACE;
+//
+// member_decl = IDENT COLON type_decl SEMI;
+// struct_decl = STRUCT IDENT LEFT_BRACE member_decl* RIGHT_BRACE;
+// union_decl = UNION IDENT LEFT_BRACE member_decl* RIGHT_BRACE;
+//
+// func_decl = FUNC IDENT func_type (SEMI | compound_stmt);
+//
+// type_decl = IDENT | BYTE | INT | VOID
+// | FUNC func_type | ptr_type | LEFT_PAREN type_decl RIGHT_PAREN;
+//
+// ptr_type = STAR type_decl;
+//
+// arg_decl = IDENT COLON type_decl;
+// func_type = LEFT_PAREN (arg_decl COMMA)* arg_decl? RIGHT_PAREN
+// (COLON type_decl)?;
+//
+// stmt = if_stmt | loop_stmt | break_stmt | continue_stmt
+// | return_stmt | var_stmt | label_stmt | goto_stmt
+// | assign_stmt | expr_stmt | empty_stmt | compound_stmt;
+//
+// elif_stmt = ELSE IF expr compound_stmt;
+// else_stmt = ELSE compound_stmt;
+// if_stmt = IF expr compound_stmt elif_stmt* else_stmt?;
+//
+// loop_stmt = LOOP compound_stmt;
+//
+// break_stmt = BREAK SEMI;
+//
+// continue_stmt = CONTINUE SEMI;
+//
+// return_stmt = RETURN expr? SEMI;
+//
+// var_stmt = VAR IDENT COLON type_decl SEMI;
+//
+// label_stmt = IDENT COLON;
+//
+// goto_stmt = GOTO IDENT SEMI;
+//
+// assign_stmt = unary_expr ASSIGN expr SEMI;
+//
+// expr_stmt = expr SEMI;
+//
+// empty_stmt = SEMI;
+//
+// compound_stmt = LEFT_BRACE stmt* RIGHT_BRACE;
+//
+// expr = bool_expr;
+//
+// bool_expr = comp_expr ((AND_THEN|OR_ELSE) comp_expr)*;
+//
+// comp_expr = add_expr ((LE|GE|LT|GT|EQ|NE) add_expr)?;
+//
+// add_expr = mul_expr ((PLUS|MINUS|PIPE|XOR) mul_expr)*;
+//
+// mul_expr = shift_expr ((STAR|SLASH|MOD|AMP) shift_expr)*;
+//
+// shift_expr = unary_expr ((LEFT_SHIFT|RIGHT_SHIFT) unary_expr)*;
+//
+// unary_expr = (AMP|STAR|PLUS|MINUS|NOT|BANG)* post_expr;
+//
+// index_expr = LEFT_SQUARE expr RIGHT_SQUARE;
+// call_expr = LEFT_PAREN (expr COMMA)* expr? RIGHT_PAREN;
+// member_expr = DOT IDENT;
+// cast_expr = AS type_decl;
+// post_expr = primary (index_expr | call_expr | member_expr | cast_expr)*;
+//
+// primary = IDENT | HEXIDECIMAL | DECIMAL | STRING | CHAR
+// | sizeof_expr | NIL | LEFT_PAREN expr RIGHT_PAREN;
+//
+// sizeof_expr = SIZEOF LEFT_PAREN expr RIGHT_PAREN;
+//
+// lexer_primary = LEFT_PAREN lexer_pattern RIGHT_PAREN | DOT | STRING | CHARSET;
+// lexer_suffix = lexer_primary (STAR|PLUS|QMARK)*;
+// lexer_alternative = lexer_suffix*;
+// lexer_pattern = lexer_alternative (PIPE lexer_alternative)*;
+// lexer_rule = IDENT ASSIGN lexer_pattern SEMI;
+// lexer_grammar = LEX LEFT_BRACE lexer_rule+ RIGHT_BRACE;
+//
+// lalr_primary = LEFT_PAREN lalr_pattern RIGHT_PAREN | IDENT;
+// lalr_suffix = lalr_primary (STAR|PLUS|QMARK)*;
+// lalr_alternative = lalr_suffix*;
+// lalr_pattern = lalr_alternative (PIPE lalr_alternative)*;
+// lalr_rule = IDENT ASSIGN lalr_pattern SEMI;
+// lalr_grammar = LALR LEFT_BRACE lalr_rule+ RIGHT_BRACE;
+//}
diff --git a/lalr.om b/lalr.om
@@ -0,0 +1,3 @@
+func lalr_compiler(c: *compiler, pn: *peg_node, err: *file) {
+ //die("lalr");
+}
diff --git a/lexer.om b/lexer.om
@@ -0,0 +1,3 @@
+func lexer_compile(c: *compiler, pn: *peg_node, err: *file) {
+ die("lexer");
+}
diff --git a/parse2.om b/parse2.om
@@ -2,6 +2,7 @@ struct parser {
a: *alloc;
p: *peg;
pc: *peg_compiler;
+ c: *compiler;
err: *file;
}
@@ -18,6 +19,8 @@ func setup_parser(cc: *compiler, err: *file): *parser {
c.err = err;
+ c.c = cc;
+
return c;
}
@@ -83,6 +86,14 @@ func reconstruct(c: *parser, pn: *peg_node): *node {
peg_compile(c.pc, pn, c.err);
pn = pn.next;
continue;
+ } else if pn.tag == P_lexer_grammar {
+ lexer_compile(c.c, pn, c.err);
+ pn = pn.next;
+ continue;
+ } else if pn.tag == P_lalr_grammar {
+ lalr_compiler(c.c, pn, c.err);
+ pn = pn.next;
+ continue;
} else {
die("invalid decl");
}