os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 0f03c71bbd3d17b1eeea308b1570010dd5209707
parent d7aac3db4d5ba66c2eb3e1738f35a074ebeaf749
Author: erai <erai@omiltem.net>
Date:   Sat, 14 Sep 2024 15:30:21 -0400

use peg.peg to parse peg

Diffstat:
Mbuild.sh | 2+-
Aparsepeg.c | 369+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mpeg.c | 470-------------------------------------------------------------------------------
3 files changed, 370 insertions(+), 471 deletions(-)

diff --git a/build.sh b/build.sh @@ -3,7 +3,7 @@ LIBS="bufio.c lib.c alloc.c syscall.c" CRYPTO="ed25519.c sha512.c sha256.c chacha20.c poly1305.c" CC="cc1.c type.c parse1.c lex1.c as.c" -PEG="peg.c peglib.c" +PEG="peg.c peglib.c parsepeg.c" BOOT="pxe.asm" SSHD="chacha20.c poly1305.c sha256.c sha512.c ed25519.c sshd.c" KERNEL="kernel.c" diff --git a/parsepeg.c b/parsepeg.c @@ -0,0 +1,369 @@ +enum { + P_grammar, + P_rule, + P_pattern, + P_alternative, + P_lookop, + P_lookahead, + P_countop, + P_suffix, + P_primary, + P_any, + P_literal, + P_class, + P_call, + P_identifier, + P_sp, +} + +tag_to_str(tag: int): *byte { + if tag == P_grammar { return "grammar"; } + if tag == P_rule { return "rule"; } + if tag == P_pattern { return "pattern"; } + if tag == P_alternative { return "alternative"; } + if tag == P_lookop { return "lookop"; } + if tag == P_lookahead { return "lookahead"; } + if tag == P_countop { return "countop"; } + if tag == P_suffix { return "suffix"; } + if tag == P_primary { return "primary"; } + if tag == P_any { return "any"; } + if tag == P_literal { return "literal"; } + if tag == P_class { return "class"; } + if tag == P_call { return "call"; } + if tag == P_identifier { return "identifier"; } + if tag == P_sp { return "sp"; } + die("invalid tag"); +} + +p_grammar(c: *peg): int { + var ok: int; + enter(c); + ok = p_sp(c); + if ok { + ok = p_rule(c); + if ok { + loop { + choice(c); + ok = p_rule(c); + if !ok { ok = 1; break; } + commit(c); + } + } + } + if ok { + choice(c); + ok = any(c); + if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } + } + if ok { leave(c, P_grammar); } else { fail(c); } + return ok; +} + +p_rule(c: *peg): int { + var ok: int; + enter(c); + ok = p_identifier(c); + if ok { + ok = p_sp(c); + } + if ok { + ok = literal(c, "<-"); + } + if ok { + ok = p_sp(c); + } + if ok { + ok = p_pattern(c); + } + if ok { leave(c, P_rule); } else { fail(c); } + return ok; +} + +p_pattern(c: *peg): int { + var ok: int; + enter(c); + ok = p_alternative(c); + if ok { + loop { + choice(c); + ok = literal(c, "/"); + if ok { + choice(c); + ok = literal(c, "/"); + if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } + } + if ok { + ok = p_sp(c); + } + if ok { + ok = p_alternative(c); + } + if !ok { ok = 1; break; } + commit(c); + } + } + if ok { leave(c, P_pattern); } else { fail(c); } + return ok; +} + +p_alternative(c: *peg): int { + var ok: int; + enter(c); + ok = p_lookahead(c); + if ok { + loop { + choice(c); + ok = p_lookahead(c); + if !ok { ok = 1; break; } + commit(c); + } + } + if ok { leave(c, P_alternative); } else { fail(c); } + return ok; +} + +p_lookop(c: *peg): int { + var ok: int; + enter(c); + ok = charset(c, "!&"); + if ok { leave(c, P_lookop); } else { fail(c); } + return ok; +} + +p_lookahead(c: *peg): int { + var ok: int; + enter(c); + choice(c); + ok = p_lookop(c); + if ok { + ok = p_sp(c); + } + if ok { commit(c); } else { ok = 1; } + if ok { + ok = p_suffix(c); + } + if ok { leave(c, P_lookahead); } else { fail(c); } + return ok; +} + +p_countop(c: *peg): int { + var ok: int; + enter(c); + ok = charset(c, "*+?"); + if ok { leave(c, P_countop); } else { fail(c); } + return ok; +} + +p_suffix(c: *peg): int { + var ok: int; + enter(c); + ok = p_primary(c); + if ok { + loop { + choice(c); + ok = p_countop(c); + if ok { + ok = p_sp(c); + } + if !ok { ok = 1; break; } + commit(c); + } + } + if ok { leave(c, P_suffix); } else { fail(c); } + return ok; +} + +p_primary(c: *peg): int { + var ok: int; + enter(c); + choice(c); + ok = literal(c, "("); + if ok { + ok = p_sp(c); + } + if ok { + ok = p_pattern(c); + } + if ok { + ok = literal(c, ")"); + } + if !ok { choice(c); + ok = p_any(c); + } + if !ok { choice(c); + ok = p_any(c); + } + if !ok { choice(c); + ok = p_literal(c); + } + if !ok { choice(c); + ok = p_literal(c); + } + if !ok { choice(c); + ok = p_class(c); + } + if !ok { choice(c); + ok = p_class(c); + } + if !ok { choice(c); + ok = p_call(c); + } + if !ok { choice(c); + ok = p_call(c); + } + if ok { commit(c); } else { fail(c); } + if ok { + ok = p_sp(c); + } + if ok { leave(c, P_primary); } else { fail(c); } + return ok; +} + +p_any(c: *peg): int { + var ok: int; + enter(c); + ok = literal(c, "."); + if ok { leave(c, P_any); } else { fail(c); } + return ok; +} + +p_literal(c: *peg): int { + var ok: int; + enter(c); + ok = charset(c, "'"); + if ok { + loop { + choice(c); + choice(c); + ok = charset(c, "'"); + if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } + if ok { + ok = any(c); + } + if !ok { ok = 1; break; } + commit(c); + } + } + if ok { + ok = charset(c, "'"); + } + if ok { leave(c, P_literal); } else { fail(c); } + return ok; +} + +p_class(c: *peg): int { + var ok: int; + enter(c); + ok = literal(c, "["); + if ok { + loop { + choice(c); + choice(c); + ok = literal(c, "]"); + if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } + if ok { + choice(c); + ok = any(c); + if ok { + ok = literal(c, "-"); + } + if ok { + ok = any(c); + } + if !ok { choice(c); + ok = any(c); + } + if !ok { choice(c); + ok = any(c); + } + if ok { commit(c); } else { fail(c); } + } + if !ok { ok = 1; break; } + commit(c); + } + } + if ok { + ok = literal(c, "]"); + } + if ok { leave(c, P_class); } else { fail(c); } + return ok; +} + +p_call(c: *peg): int { + var ok: int; + enter(c); + ok = p_identifier(c); + if ok { + choice(c); + ok = p_sp(c); + if ok { + ok = literal(c, "<-"); + } + if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } + } + if ok { leave(c, P_call); } else { fail(c); } + return ok; +} + +p_identifier(c: *peg): int { + var ok: int; + enter(c); + ok = charset(c, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"); + if ok { + loop { + choice(c); + ok = charset(c, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"); + if !ok { ok = 1; break; } + commit(c); + } + } + if ok { leave(c, P_identifier); } else { fail(c); } + return ok; +} + +p_sp(c: *peg): int { + var ok: int; + enter(c); + loop { + choice(c); + choice(c); + ok = charset(c, "\x09\x0a\x0d\x20"); + if !ok { choice(c); + ok = literal(c, "//"); + if ok { + loop { + choice(c); + choice(c); + ok = charset(c, "\x0a\x0d"); + if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } + if ok { + ok = any(c); + } + if !ok { ok = 1; break; } + commit(c); + } + } + } + if !ok { choice(c); + ok = literal(c, "//"); + if ok { + loop { + choice(c); + choice(c); + ok = charset(c, "\x0a\x0d"); + if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } + if ok { + ok = any(c); + } + if !ok { ok = 1; break; } + commit(c); + } + } + } + if ok { commit(c); } else { fail(c); } + if !ok { ok = 1; break; } + commit(c); + } + if ok { leave(c, P_sp); } else { fail(c); } + return ok; +} diff --git a/peg.c b/peg.c @@ -6,476 +6,6 @@ struct compiler { } enum { - FAIL = 0, - OK = 1, -} - -enum { - P_grammar = 1, - P_rule, - P_pattern, - P_alternative, - P_lookop, - P_lookahead, - P_countop, - P_suffix, - P_primary, - P_any, - P_literal, - P_class, - P_call, - P_identifier, - P_sp, -} - -tag_to_str(tag: int): *byte { - if tag == P_grammar { return "P_grammar"; } - if tag == P_rule { return "P_rule"; } - if tag == P_pattern { return "P_pattern"; } - if tag == P_alternative { return "P_alternative"; } - if tag == P_lookop { return "P_lookop"; } - if tag == P_lookahead { return "P_lookahead"; } - if tag == P_countop { return "P_countop"; } - if tag == P_suffix { return "P_suffix"; } - if tag == P_primary { return "P_primary"; } - if tag == P_any { return "P_any"; } - if tag == P_literal { return "P_literal"; } - if tag == P_class { return "P_class"; } - if tag == P_call { return "P_call"; } - if tag == P_identifier { return "P_identifier"; } - if tag == P_sp { return "P_sp"; } - return "(invalid)"; -} - -// grammar <- sp rule+ !. -p_grammar(c: *peg): int { - enter(c); - - if !p_sp(c) { - fail(c); - return FAIL; - } - - if !p_rule(c) { - fail(c); - return FAIL; - } - - loop { - choice(c); - if !p_rule(c) { - break; - } - commit(c); - } - - choice(c); - if any(c) { - fail(c); - fail(c); - return FAIL; - } - - leave(c, P_grammar); - return OK; -} - -// rule <- identifier sp '<-' sp pattern -p_rule(c: *peg): int { - enter(c); - - if !p_identifier(c) { - fail(c); - return FAIL; - } - - if !p_sp(c) { - fail(c); - return FAIL; - } - - if !literal(c, "<-") { - fail(c); - return FAIL; - } - - if !p_sp(c) { - fail(c); - return FAIL; - } - - if !p_pattern(c) { - fail(c); - return FAIL; - } - - leave(c, P_rule); - return OK; -} - -// pattern <- alternative ( '/' !'/' sp alternative )* -p_pattern(c: *peg): int { - enter(c); - - if !p_alternative(c) { - fail(c); - return FAIL; - } - - loop { - choice(c); - - if !literal(c, "/") { - break; - } - - choice(c); - if literal(c, "/") { - fail(c); - fail(c); - return FAIL; - } - - if !p_sp(c) { - break; - } - - if !p_alternative(c) { - break; - } - - commit(c); - } - - leave(c, P_pattern); - return OK; -} - -// lookop <- [!&] -p_lookop(c: *peg): int { - enter(c); - - if !charset(c, "!&") { - fail(c); - return FAIL; - } - - leave(c, P_lookop); - return OK; -} - -// alternative <- lookahead+ -p_alternative(c: *peg): int { - enter(c); - - if !p_lookahead(c) { - fail(c); - return FAIL; - } - - loop { - choice(c); - - if !p_lookahead(c) { - break; - } - - commit(c); - } - - leave(c, P_alternative); - return OK; -} - -// lookahead <- (lookop sp)? suffix -p_lookahead(c: *peg): int { - enter(c); - - choice(c); - if p_lookop(c) && p_sp(c) { - commit(c); - } - - if !p_suffix(c) { - fail(c); - return FAIL; - } - - leave(c, P_lookahead); - return OK; -} - -// countop <- [*+?] -p_countop(c: *peg): int { - enter(c); - - if !charset(c, "*+?") { - fail(c); - return FAIL; - } - - leave(c, P_countop); - return OK; -} - -// suffix <- primary (countop sp)* -p_suffix(c: *peg): int { - enter(c); - - if !p_primary(c) { - fail(c); - return FAIL; - } - - loop { - choice(c); - - if !p_countop(c) { - break; - } - - if !p_sp(c) { - break; - } - - commit(c); - } - - leave(c, P_suffix); - return OK; -} - -// primary <- ( '(' sp pattern ')' / any / literal / class / call ) sp -p_primary(c: *peg): int { - enter(c); - - loop { - choice(c); - if literal(c, "(") && p_sp(c) && p_pattern(c) && literal(c, ")") { - commit(c); - break; - } - - choice(c); - if p_any(c) { - commit(c); - break; - } - - choice(c); - if p_literal(c) { - commit(c); - break; - } - - choice(c); - if p_class(c) { - commit(c); - break; - } - - choice(c); - if p_call(c) { - commit(c); - break; - } - - fail(c); - fail(c); - return FAIL; - } - - if !p_sp(c) { - fail(c); - return FAIL; - } - - leave(c, P_primary); - return OK; -} - -// any <- '.' -p_any(c: *peg): int { - enter(c); - - if !literal(c, ".") { - fail(c); - return FAIL; - } - - leave(c, P_any); - return OK; -} - -// literal <- ['] ( !['] . )* ['] -p_literal(c: *peg): int { - enter(c); - - if !literal(c, "'") { - fail(c); - return FAIL; - } - - loop { - choice(c); - - choice(c); - if literal(c, "'") { - fail(c); - fail(c); - break; - } - - if !any(c) { - break; - } - - commit(c); - } - - if !literal(c, "'") { - fail(c); - return FAIL; - } - - leave(c, P_literal); - return OK; -} - -// charclass <- '[' ( !']' ( . '-' . / . ) )* ']' -p_class(c: *peg): int { - enter(c); - - if !literal(c, "[") { - fail(c); - return FAIL; - } - - loop { - choice(c); - - choice(c); - if literal(c, "]") { - fail(c); - fail(c); - break; - } - - if !any(c) { - break; - } - - choice(c); - if literal(c, "-") && any(c) { - commit(c); - } - - commit(c); - } - - if !literal(c, "]") { - fail(c); - return FAIL; - } - - leave(c, P_class); - return OK; -} - -// call <- identifier !'<-' -p_call(c: *peg): int { - enter(c); - - if !p_identifier(c) { - fail(c); - return FAIL; - } - - choice(c); - if p_sp(c) && literal(c, "<-") { - fail(c); - fail(c); - fail(c); - return FAIL; - } - - leave(c, P_call); - return OK; -} - -// identifier <- [a-zA-Z0-9_]+ -p_identifier(c: *peg): int { - var chars: *byte; - enter(c); - - chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; - - if !charset(c, chars) { - fail(c); - return FAIL; - } - - loop { - choice(c); - - if !charset(c, chars) { - break; - } - - commit(c); - } - - leave(c, P_identifier); - return OK; -} - -// sp <- ( [ \t\r\n] / '//' ( ![\r\n] . )* )* -p_sp(c: *peg): int { - enter(c); - - loop { - choice(c); - - choice(c); - if charset(c, " \t\r\n") { - commit(c); - commit(c); - continue; - } - - choice(c); - if literal(c, "//") { - loop { - choice(c); - - choice(c); - if charset(c, "\r\n") { - fail(c); - fail(c); - break; - } - - if !any(c) { - break; - } - - commit(c); - } - - commit(c); - commit(c); - continue; - } - - fail(c); - break; - } - - leave(c, P_sp); - return OK; -} - -enum { LOOK_NORMAL, LOOK_NOT, LOOK_AND,