commit 261f1f71bbb3e08d51f7ca810509c1cd69b22bca
parent da2f6fae3ef648e090af395192959965d30d55db
Author: erai <erai@omiltem.net>
Date: Tue, 8 Apr 2025 03:09:26 +0000
define enums for lexer patterns
Diffstat:
4 files changed, 83 insertions(+), 40 deletions(-)
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -2,13 +2,13 @@
BOOTSTRAP="cc0.c"
LIBS="bufio.om lib.om alloc.om syscall.om"
-SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om"
+SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om cc4.om"
# Build the bootstrap compiler from c
[ cc0 -nt cc0.c ] || gcc -O1 -g -std=c89 ${BOOTSTRAP} -o cc0
./cc0 ${LIBS} ${SOURCES} cc3.om -o cc1 -n cc1.lines -G cc1.call
# Double check the bootstrap and self hosting compiler have the same output
-./cc1 ${LIBS} ${SOURCES} cc3.om cc4.om -C cc2.c -o cc2 -n cc2.lines -G cc2.call
+./cc1 ${LIBS} ${SOURCES} cc3.om -C cc2.c -o cc2 -n cc2.lines -G cc2.call
cmp cc1 cc2 || echo cc mismatch
cmp cc0.c cc2.c || echo bootstrap mismatch
diff --git a/cc0.c b/cc0.c
@@ -23298,7 +23298,7 @@ b4: goto b2;
u zlexer_compile(u vc, u vpn, u verr) {
u va = 0;
u vb = 0;
- u v5 = 0;
+ u vd = 0;
u v6 = 0;
u v7 = 0;
u v8 = 0;
@@ -23312,30 +23312,31 @@ u zlexer_compile(u vc, u vpn, u verr) {
u v16 = 0;
u v17 = 0;
u v18 = 0;
+ u v19 = 0;
vpn = *(u*)(vpn + 16UL);
b1: if (!vpn) goto b7;
- v5 = 0UL;
-b8: if (!v5) goto b5;
- v15 = (u)zlexer_explode;
- v16 = vc;
- v17 = va;
- v18 = ((u(*)())v15)(v16, v17);
- v18;
+ v6 = 0UL;
+b8: if (!v6) goto b5;
+ v16 = (u)zlexer_explode;
+ v17 = vc;
+ v18 = va;
+ v19 = ((u(*)())v16)(v17, v18);
+ vd = v19;
return 0UL;
-b5: v6 = (u)zlexer_compile_rule;
- v7 = vc;
- v8 = vpn;
- v9 = ((u(*)())v6)(v7, v8);
- vb = v9;
- v10 = (u)znfa_alt;
- v11 = vc;
- v12 = va;
- v13 = vb;
- v14 = ((u(*)())v10)(v11, v12, v13);
- va = v14;
+b5: v7 = (u)zlexer_compile_rule;
+ v8 = vc;
+ v9 = vpn;
+ v10 = ((u(*)())v7)(v8, v9);
+ vb = v10;
+ v11 = (u)znfa_alt;
+ v12 = vc;
+ v13 = va;
+ v14 = vb;
+ v15 = ((u(*)())v11)(v12, v13, v14);
+ va = v15;
vpn = *(u*)(vpn + 8UL);
goto b1;
-b7: v5 = 1UL;
+b7: v6 = 1UL;
goto b8;
}
u zlexer_compile_alternative(u vc, u vpn) {
@@ -23661,7 +23662,7 @@ u zlexer_compile_rule(u vc, u vpn) {
u vpat = 0;
u va = 0;
u vb = 0;
- u v6 = 0;
+ u vtag = 0;
u v7 = 0;
u v8 = 0;
u v9 = 0;
@@ -23673,25 +23674,58 @@ u zlexer_compile_rule(u vc, u vpn) {
u v15 = 0;
u v16 = 0;
u v17 = 0;
+ u v18 = 0;
+ u v19 = 0;
+ u v20 = 0;
+ u v21 = 0;
+ u v22 = 0;
+ u v23 = 0;
+ u v24 = 0;
+ u v25 = 0;
+ u v26 = 0;
+ u v27 = 0;
+ u v28 = 0;
+ u v29 = 0;
+ u v30 = 0;
+ u v31 = 0;
+ u v32 = 0;
vident = *(u*)(vpn + 16UL);
vpat = *(u*)(vident + 8UL);
- v6 = (u)zlexer_compile_pattern;
- v7 = vc;
- v8 = vpat;
- v9 = ((u(*)())v6)(v7, v8);
- va = v9;
- v10 = (u)znfa_empty;
- v11 = vc;
- v12 = ((u(*)())v10)(v11);
- vb = v12;
+ v7 = (u)zalloc;
+ v8 = *(u*)(vc + 0UL);
+ v9 = *(u*)(vident + 32UL) + 1UL;
+ v10 = ((u(*)())v7)(v8, v9);
+ vtag = v10;
+ v11 = (u)zmemcpy;
+ v12 = vtag;
+ v13 = *(u*)(vident + 24UL);
+ v14 = *(u*)(vident + 32UL);
+ v15 = ((u(*)())v11)(v12, v13, v14);
+ v15;
+ *(b*)(vtag + *(u*)(vident + 32UL) * 1UL) = 0UL;
+ v16 = (u)zlexer_compile_pattern;
+ v17 = vc;
+ v18 = vpat;
+ v19 = ((u(*)())v16)(v17, v18);
+ va = v19;
+ v20 = (u)znfa_empty;
+ v21 = vc;
+ v22 = ((u(*)())v20)(v21);
+ vb = v22;
*(u*)(vc + 112UL) = *(u*)(vc + 112UL) + 1UL;
*(u*)(vb + 8UL) = *(u*)(vc + 112UL);
- v13 = (u)znfa_concat;
- v14 = vc;
- v15 = va;
- v16 = vb;
- v17 = ((u(*)())v13)(v14, v15, v16);
- return v17;
+ v23 = (u)zdefine_enum_tag;
+ v24 = vc;
+ v25 = vtag;
+ v26 = *(u*)(vb + 8UL);
+ v27 = ((u(*)())v23)(v24, v25, v26);
+ v27;
+ v28 = (u)znfa_concat;
+ v29 = vc;
+ v30 = va;
+ v31 = vb;
+ v32 = ((u(*)())v28)(v29, v30, v31);
+ return v32;
}
u zlexer_compile_str(u vc, u vpn) {
u vi = 0;
diff --git a/cc4.om b/cc4.om
@@ -66,7 +66,8 @@ lexer {
CHAR = "'" ("\\" . | [[^\\\x27]])* "'";
CHARSET = "[[" ([[^\]\\]]|"\\".)* "]]";
- SPACE = ([[ \r\n\t]] | "//" [[^\n]]*)+;
+ SPACE = [[ \r\n\t]];
+ COMMENT = "//";
}
//lalr {
diff --git a/lexer.om b/lexer.om
@@ -193,6 +193,7 @@ func nfa_any(c: *compiler): *nfa {
func lexer_compile(c: *compiler, pn: *peg_node, err: *file) {
var a: *nfa;
var b: *nfa;
+ var d: *dfa;
pn = pn.child;
loop {
@@ -206,7 +207,7 @@ func lexer_compile(c: *compiler, pn: *peg_node, err: *file) {
pn = pn.next;
}
- lexer_explode(c, a);
+ d = lexer_explode(c, a);
}
func lexer_compile_rule(c: *compiler, pn: *peg_node): *nfa {
@@ -214,15 +215,22 @@ func lexer_compile_rule(c: *compiler, pn: *peg_node): *nfa {
var pat: *peg_node;
var a: *nfa;
var b: *nfa;
+ var tag: *byte;
ident = pn.child;
pat = ident.next;
+ tag = alloc(c.a, ident.len + 1);
+ memcpy(tag, ident.str, ident.len);
+ tag[ident.len] = 0 as byte;
+
a = lexer_compile_pattern(c, pat);
b = nfa_empty(c);
c.lex_id = c.lex_id + 1;
b.tag = c.lex_id;
+ define_enum_tag(c, tag, b.tag);
+
return nfa_concat(c, a, b);
}