commit 0f03c71bbd3d17b1eeea308b1570010dd5209707
parent d7aac3db4d5ba66c2eb3e1738f35a074ebeaf749
Author: erai <erai@omiltem.net>
Date: Sat, 14 Sep 2024 15:30:21 -0400
use peg.peg to parse peg
Diffstat:
M | build.sh | | | 2 | +- |
A | parsepeg.c | | | 369 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | peg.c | | | 470 | ------------------------------------------------------------------------------- |
3 files changed, 370 insertions(+), 471 deletions(-)
diff --git a/build.sh b/build.sh
@@ -3,7 +3,7 @@
LIBS="bufio.c lib.c alloc.c syscall.c"
CRYPTO="ed25519.c sha512.c sha256.c chacha20.c poly1305.c"
CC="cc1.c type.c parse1.c lex1.c as.c"
-PEG="peg.c peglib.c"
+PEG="peg.c peglib.c parsepeg.c"
BOOT="pxe.asm"
SSHD="chacha20.c poly1305.c sha256.c sha512.c ed25519.c sshd.c"
KERNEL="kernel.c"
diff --git a/parsepeg.c b/parsepeg.c
@@ -0,0 +1,369 @@
+enum {
+ P_grammar,
+ P_rule,
+ P_pattern,
+ P_alternative,
+ P_lookop,
+ P_lookahead,
+ P_countop,
+ P_suffix,
+ P_primary,
+ P_any,
+ P_literal,
+ P_class,
+ P_call,
+ P_identifier,
+ P_sp,
+}
+
+tag_to_str(tag: int): *byte {
+ if tag == P_grammar { return "grammar"; }
+ if tag == P_rule { return "rule"; }
+ if tag == P_pattern { return "pattern"; }
+ if tag == P_alternative { return "alternative"; }
+ if tag == P_lookop { return "lookop"; }
+ if tag == P_lookahead { return "lookahead"; }
+ if tag == P_countop { return "countop"; }
+ if tag == P_suffix { return "suffix"; }
+ if tag == P_primary { return "primary"; }
+ if tag == P_any { return "any"; }
+ if tag == P_literal { return "literal"; }
+ if tag == P_class { return "class"; }
+ if tag == P_call { return "call"; }
+ if tag == P_identifier { return "identifier"; }
+ if tag == P_sp { return "sp"; }
+ die("invalid tag");
+}
+
+p_grammar(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = p_sp(c);
+ if ok {
+ ok = p_rule(c);
+ if ok {
+ loop {
+ choice(c);
+ ok = p_rule(c);
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ }
+ if ok {
+ choice(c);
+ ok = any(c);
+ if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
+ }
+ if ok { leave(c, P_grammar); } else { fail(c); }
+ return ok;
+}
+
+p_rule(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = p_identifier(c);
+ if ok {
+ ok = p_sp(c);
+ }
+ if ok {
+ ok = literal(c, "<-");
+ }
+ if ok {
+ ok = p_sp(c);
+ }
+ if ok {
+ ok = p_pattern(c);
+ }
+ if ok { leave(c, P_rule); } else { fail(c); }
+ return ok;
+}
+
+p_pattern(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = p_alternative(c);
+ if ok {
+ loop {
+ choice(c);
+ ok = literal(c, "/");
+ if ok {
+ choice(c);
+ ok = literal(c, "/");
+ if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
+ }
+ if ok {
+ ok = p_sp(c);
+ }
+ if ok {
+ ok = p_alternative(c);
+ }
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ if ok { leave(c, P_pattern); } else { fail(c); }
+ return ok;
+}
+
+p_alternative(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = p_lookahead(c);
+ if ok {
+ loop {
+ choice(c);
+ ok = p_lookahead(c);
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ if ok { leave(c, P_alternative); } else { fail(c); }
+ return ok;
+}
+
+p_lookop(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = charset(c, "!&");
+ if ok { leave(c, P_lookop); } else { fail(c); }
+ return ok;
+}
+
+p_lookahead(c: *peg): int {
+ var ok: int;
+ enter(c);
+ choice(c);
+ ok = p_lookop(c);
+ if ok {
+ ok = p_sp(c);
+ }
+ if ok { commit(c); } else { ok = 1; }
+ if ok {
+ ok = p_suffix(c);
+ }
+ if ok { leave(c, P_lookahead); } else { fail(c); }
+ return ok;
+}
+
+p_countop(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = charset(c, "*+?");
+ if ok { leave(c, P_countop); } else { fail(c); }
+ return ok;
+}
+
+p_suffix(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = p_primary(c);
+ if ok {
+ loop {
+ choice(c);
+ ok = p_countop(c);
+ if ok {
+ ok = p_sp(c);
+ }
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ if ok { leave(c, P_suffix); } else { fail(c); }
+ return ok;
+}
+
+p_primary(c: *peg): int {
+ var ok: int;
+ enter(c);
+ choice(c);
+ ok = literal(c, "(");
+ if ok {
+ ok = p_sp(c);
+ }
+ if ok {
+ ok = p_pattern(c);
+ }
+ if ok {
+ ok = literal(c, ")");
+ }
+ if !ok { choice(c);
+ ok = p_any(c);
+ }
+ if !ok { choice(c);
+ ok = p_any(c);
+ }
+ if !ok { choice(c);
+ ok = p_literal(c);
+ }
+ if !ok { choice(c);
+ ok = p_literal(c);
+ }
+ if !ok { choice(c);
+ ok = p_class(c);
+ }
+ if !ok { choice(c);
+ ok = p_class(c);
+ }
+ if !ok { choice(c);
+ ok = p_call(c);
+ }
+ if !ok { choice(c);
+ ok = p_call(c);
+ }
+ if ok { commit(c); } else { fail(c); }
+ if ok {
+ ok = p_sp(c);
+ }
+ if ok { leave(c, P_primary); } else { fail(c); }
+ return ok;
+}
+
+p_any(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = literal(c, ".");
+ if ok { leave(c, P_any); } else { fail(c); }
+ return ok;
+}
+
+p_literal(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = charset(c, "'");
+ if ok {
+ loop {
+ choice(c);
+ choice(c);
+ ok = charset(c, "'");
+ if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
+ if ok {
+ ok = any(c);
+ }
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ if ok {
+ ok = charset(c, "'");
+ }
+ if ok { leave(c, P_literal); } else { fail(c); }
+ return ok;
+}
+
+p_class(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = literal(c, "[");
+ if ok {
+ loop {
+ choice(c);
+ choice(c);
+ ok = literal(c, "]");
+ if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
+ if ok {
+ choice(c);
+ ok = any(c);
+ if ok {
+ ok = literal(c, "-");
+ }
+ if ok {
+ ok = any(c);
+ }
+ if !ok { choice(c);
+ ok = any(c);
+ }
+ if !ok { choice(c);
+ ok = any(c);
+ }
+ if ok { commit(c); } else { fail(c); }
+ }
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ if ok {
+ ok = literal(c, "]");
+ }
+ if ok { leave(c, P_class); } else { fail(c); }
+ return ok;
+}
+
+p_call(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = p_identifier(c);
+ if ok {
+ choice(c);
+ ok = p_sp(c);
+ if ok {
+ ok = literal(c, "<-");
+ }
+ if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
+ }
+ if ok { leave(c, P_call); } else { fail(c); }
+ return ok;
+}
+
+p_identifier(c: *peg): int {
+ var ok: int;
+ enter(c);
+ ok = charset(c, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ if ok {
+ loop {
+ choice(c);
+ ok = charset(c, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ if ok { leave(c, P_identifier); } else { fail(c); }
+ return ok;
+}
+
+p_sp(c: *peg): int {
+ var ok: int;
+ enter(c);
+ loop {
+ choice(c);
+ choice(c);
+ ok = charset(c, "\x09\x0a\x0d\x20");
+ if !ok { choice(c);
+ ok = literal(c, "//");
+ if ok {
+ loop {
+ choice(c);
+ choice(c);
+ ok = charset(c, "\x0a\x0d");
+ if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
+ if ok {
+ ok = any(c);
+ }
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ }
+ if !ok { choice(c);
+ ok = literal(c, "//");
+ if ok {
+ loop {
+ choice(c);
+ choice(c);
+ ok = charset(c, "\x0a\x0d");
+ if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
+ if ok {
+ ok = any(c);
+ }
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ }
+ }
+ if ok { commit(c); } else { fail(c); }
+ if !ok { ok = 1; break; }
+ commit(c);
+ }
+ if ok { leave(c, P_sp); } else { fail(c); }
+ return ok;
+}
diff --git a/peg.c b/peg.c
@@ -6,476 +6,6 @@ struct compiler {
}
enum {
- FAIL = 0,
- OK = 1,
-}
-
-enum {
- P_grammar = 1,
- P_rule,
- P_pattern,
- P_alternative,
- P_lookop,
- P_lookahead,
- P_countop,
- P_suffix,
- P_primary,
- P_any,
- P_literal,
- P_class,
- P_call,
- P_identifier,
- P_sp,
-}
-
-tag_to_str(tag: int): *byte {
- if tag == P_grammar { return "P_grammar"; }
- if tag == P_rule { return "P_rule"; }
- if tag == P_pattern { return "P_pattern"; }
- if tag == P_alternative { return "P_alternative"; }
- if tag == P_lookop { return "P_lookop"; }
- if tag == P_lookahead { return "P_lookahead"; }
- if tag == P_countop { return "P_countop"; }
- if tag == P_suffix { return "P_suffix"; }
- if tag == P_primary { return "P_primary"; }
- if tag == P_any { return "P_any"; }
- if tag == P_literal { return "P_literal"; }
- if tag == P_class { return "P_class"; }
- if tag == P_call { return "P_call"; }
- if tag == P_identifier { return "P_identifier"; }
- if tag == P_sp { return "P_sp"; }
- return "(invalid)";
-}
-
-// grammar <- sp rule+ !.
-p_grammar(c: *peg): int {
- enter(c);
-
- if !p_sp(c) {
- fail(c);
- return FAIL;
- }
-
- if !p_rule(c) {
- fail(c);
- return FAIL;
- }
-
- loop {
- choice(c);
- if !p_rule(c) {
- break;
- }
- commit(c);
- }
-
- choice(c);
- if any(c) {
- fail(c);
- fail(c);
- return FAIL;
- }
-
- leave(c, P_grammar);
- return OK;
-}
-
-// rule <- identifier sp '<-' sp pattern
-p_rule(c: *peg): int {
- enter(c);
-
- if !p_identifier(c) {
- fail(c);
- return FAIL;
- }
-
- if !p_sp(c) {
- fail(c);
- return FAIL;
- }
-
- if !literal(c, "<-") {
- fail(c);
- return FAIL;
- }
-
- if !p_sp(c) {
- fail(c);
- return FAIL;
- }
-
- if !p_pattern(c) {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_rule);
- return OK;
-}
-
-// pattern <- alternative ( '/' !'/' sp alternative )*
-p_pattern(c: *peg): int {
- enter(c);
-
- if !p_alternative(c) {
- fail(c);
- return FAIL;
- }
-
- loop {
- choice(c);
-
- if !literal(c, "/") {
- break;
- }
-
- choice(c);
- if literal(c, "/") {
- fail(c);
- fail(c);
- return FAIL;
- }
-
- if !p_sp(c) {
- break;
- }
-
- if !p_alternative(c) {
- break;
- }
-
- commit(c);
- }
-
- leave(c, P_pattern);
- return OK;
-}
-
-// lookop <- [!&]
-p_lookop(c: *peg): int {
- enter(c);
-
- if !charset(c, "!&") {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_lookop);
- return OK;
-}
-
-// alternative <- lookahead+
-p_alternative(c: *peg): int {
- enter(c);
-
- if !p_lookahead(c) {
- fail(c);
- return FAIL;
- }
-
- loop {
- choice(c);
-
- if !p_lookahead(c) {
- break;
- }
-
- commit(c);
- }
-
- leave(c, P_alternative);
- return OK;
-}
-
-// lookahead <- (lookop sp)? suffix
-p_lookahead(c: *peg): int {
- enter(c);
-
- choice(c);
- if p_lookop(c) && p_sp(c) {
- commit(c);
- }
-
- if !p_suffix(c) {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_lookahead);
- return OK;
-}
-
-// countop <- [*+?]
-p_countop(c: *peg): int {
- enter(c);
-
- if !charset(c, "*+?") {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_countop);
- return OK;
-}
-
-// suffix <- primary (countop sp)*
-p_suffix(c: *peg): int {
- enter(c);
-
- if !p_primary(c) {
- fail(c);
- return FAIL;
- }
-
- loop {
- choice(c);
-
- if !p_countop(c) {
- break;
- }
-
- if !p_sp(c) {
- break;
- }
-
- commit(c);
- }
-
- leave(c, P_suffix);
- return OK;
-}
-
-// primary <- ( '(' sp pattern ')' / any / literal / class / call ) sp
-p_primary(c: *peg): int {
- enter(c);
-
- loop {
- choice(c);
- if literal(c, "(") && p_sp(c) && p_pattern(c) && literal(c, ")") {
- commit(c);
- break;
- }
-
- choice(c);
- if p_any(c) {
- commit(c);
- break;
- }
-
- choice(c);
- if p_literal(c) {
- commit(c);
- break;
- }
-
- choice(c);
- if p_class(c) {
- commit(c);
- break;
- }
-
- choice(c);
- if p_call(c) {
- commit(c);
- break;
- }
-
- fail(c);
- fail(c);
- return FAIL;
- }
-
- if !p_sp(c) {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_primary);
- return OK;
-}
-
-// any <- '.'
-p_any(c: *peg): int {
- enter(c);
-
- if !literal(c, ".") {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_any);
- return OK;
-}
-
-// literal <- ['] ( !['] . )* [']
-p_literal(c: *peg): int {
- enter(c);
-
- if !literal(c, "'") {
- fail(c);
- return FAIL;
- }
-
- loop {
- choice(c);
-
- choice(c);
- if literal(c, "'") {
- fail(c);
- fail(c);
- break;
- }
-
- if !any(c) {
- break;
- }
-
- commit(c);
- }
-
- if !literal(c, "'") {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_literal);
- return OK;
-}
-
-// charclass <- '[' ( !']' ( . '-' . / . ) )* ']'
-p_class(c: *peg): int {
- enter(c);
-
- if !literal(c, "[") {
- fail(c);
- return FAIL;
- }
-
- loop {
- choice(c);
-
- choice(c);
- if literal(c, "]") {
- fail(c);
- fail(c);
- break;
- }
-
- if !any(c) {
- break;
- }
-
- choice(c);
- if literal(c, "-") && any(c) {
- commit(c);
- }
-
- commit(c);
- }
-
- if !literal(c, "]") {
- fail(c);
- return FAIL;
- }
-
- leave(c, P_class);
- return OK;
-}
-
-// call <- identifier !'<-'
-p_call(c: *peg): int {
- enter(c);
-
- if !p_identifier(c) {
- fail(c);
- return FAIL;
- }
-
- choice(c);
- if p_sp(c) && literal(c, "<-") {
- fail(c);
- fail(c);
- fail(c);
- return FAIL;
- }
-
- leave(c, P_call);
- return OK;
-}
-
-// identifier <- [a-zA-Z0-9_]+
-p_identifier(c: *peg): int {
- var chars: *byte;
- enter(c);
-
- chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
-
- if !charset(c, chars) {
- fail(c);
- return FAIL;
- }
-
- loop {
- choice(c);
-
- if !charset(c, chars) {
- break;
- }
-
- commit(c);
- }
-
- leave(c, P_identifier);
- return OK;
-}
-
-// sp <- ( [ \t\r\n] / '//' ( ![\r\n] . )* )*
-p_sp(c: *peg): int {
- enter(c);
-
- loop {
- choice(c);
-
- choice(c);
- if charset(c, " \t\r\n") {
- commit(c);
- commit(c);
- continue;
- }
-
- choice(c);
- if literal(c, "//") {
- loop {
- choice(c);
-
- choice(c);
- if charset(c, "\r\n") {
- fail(c);
- fail(c);
- break;
- }
-
- if !any(c) {
- break;
- }
-
- commit(c);
- }
-
- commit(c);
- commit(c);
- continue;
- }
-
- fail(c);
- break;
- }
-
- leave(c, P_sp);
- return OK;
-}
-
-enum {
LOOK_NORMAL,
LOOK_NOT,
LOOK_AND,