commit e98815b06bc941a0b653656b6176c614e78fca0d
parent ea6a9e8b6d4947c067ce6355ec2893b386380025
Author: erai <erai@omiltem.net>
Date: Thu, 12 Sep 2024 12:47:50 -0400
Add peg parser recognizer
Diffstat:
M | bufio.c | | | 7 | +++++++ |
A | cc3.peg | | | 120 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | peg.c | | | 524 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | peg.peg | | | 15 | +++++++++++++++ |
M | syscall.c | | | 4 | ++++ |
5 files changed, 670 insertions(+), 0 deletions(-)
diff --git a/bufio.c b/bufio.c
@@ -153,3 +153,10 @@ fputs(f: *file, s: *byte) {
i = i + 1;
}
}
+
+fseek(f: *file, off: int) {
+ f.r = 0;
+ f.w = 0;
+ f.eof = 0;
+ lseek(f.fd, off, 0);
+}
diff --git a/cc3.peg b/cc3.peg
@@ -0,0 +1,120 @@
+grammar <- sp (enum_decl / struct_decl / func_decl)* !.
+
+enum_decl <- enum ident '{' sp (ident (',' ident)*)? (',' sp)? '}' sp
+
+struct_decl <- struct ident '{' sp (ident ':' sp type ';' sp)* '}' sp
+
+func_decl <- ident func_type (';' / '{' sp stmt* '}' ) sp
+
+type <- ident
+ / byte
+ / int
+ / void
+ / '*' sp type
+ / '(' sp type ')' sp
+ / func func_type
+
+func_type <- '(' sp
+ ( ident ':' sp type (',' sp ident ':' sp type)* )?
+ ( ',' sp )?
+ ')' sp (':' sp type)?
+
+stmt <- if_stmt
+ / loop_stmt
+ / break_stmt
+ / continue_stmt
+ / return_stmt
+ / var_stmt
+ / label_stmt
+ / goto_stmt
+ / assign_stmt
+ / expr_stmt
+ / empty_stmt
+
+if_stmt <- if expr '{' sp stmt* '}' sp
+ (else if expr '{' sp stmt* '}' sp)*
+ (else '{' sp stmt '}' sp)?
+
+loop_stmt <- 'loop' sp '{' sp stmt* '}' sp
+
+break_stmt <- 'break' sp ';' sp
+
+continue_stmt <- 'continue' sp ';' sp
+
+return_stmt <- return expr? sp ';' sp
+
+var_stmt <- var ident ':' sp type ';' sp
+
+label_stmt <- ':' sp ident ';' sp
+
+goto_stmt <- goto ident ';' sp
+
+assign_stmt <- unary_expr '=' sp expr ';' sp
+
+expr_stmt <- expr ';' sp
+
+empty_stmt <- ';' sp
+
+bool_expr <- comp_expr (('&&' / '||') sp comp_expr)*
+
+comp_expr <- add_expr (('<=' / '>=' / '<' / '>' / '==' / '!=') sp add_expr)?
+
+add_expr <- mul_expr (('+' / '-' / '|' / '^') sp add_expr)*
+
+mul_expr <- shift_expr (('*' / '/' / '%' / '&') sp mul_expr)*
+
+shift_expr <- unary_expr (('<<' / '>>') sp shift_expr)*
+
+unary_expr <- (('&' / '*' / '+' / '-' / '~' / '!') sp)* post_expr
+
+post_expr <- primary ( '[' sp expr ']' sp
+ / '(' sp ( expr (',' sp expr)* )? (',' sp)? ')' sp
+ / '.' sp ident
+ / ':' sp type )*
+
+primary <- ident
+ / literal
+ / '(' sp expr ')' sp
+ / sizeof '(' sp expr ')' sp
+
+literal <- '0x'[0-9a-fA-F]+ sp
+ / [0-9]+ sp
+ / ["] ([\\] . / .)* ["] sp
+ / ['] ([\\] . / .) ['] sp
+
+reserved <- return
+ / break
+ / sizeof
+ / if
+ / else
+ / loop
+ / continue
+ / goto
+ / var
+ / enum
+ / struct
+ / byte
+ / int
+ / void
+
+return <- 'return' tc
+break <- 'break' tc
+sizeof <- 'sizeof' tc
+if <- 'if' tc
+else <- 'else' tc
+loop <- 'loop' tc
+continue <- 'continue' tc
+goto <- 'goto' tc
+var <- 'var' tc
+enum <- 'enum' tc
+struct <- 'struct' tc
+byte <- 'byte' tc
+int <- 'int' tc
+void <- 'void' tc
+
+ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]* sp
+
+tc <- ![a-zA-Z0-9_] sp
+
+sp <- ( [ \r\n\t]
+ / '//' (![\r\n] .)* )*
diff --git a/peg.c b/peg.c
@@ -0,0 +1,524 @@
+struct compiler {
+ a: *alloc;
+ f: *file;
+ pos: int;
+ stack: *int;
+ sp: int;
+ limit: int;
+}
+
+enum {
+ FAIL = 0,
+ OK = 1,
+}
+
+choice(c: *compiler) {
+ if c.sp == c.limit {
+ die("backtrack overflow");
+ }
+ c.stack[c.sp] = c.pos;
+ c.sp = c.sp + 1;
+}
+
+commit(c: *compiler) {
+ if c.sp == 0 {
+ die("backtrack underflow");
+ }
+ c.sp = c.sp - 1;
+}
+
+fail(c: *compiler) {
+ if c.sp == 0 {
+ die("backtrack underflow");
+ }
+ c.sp = c.sp - 1;
+ c.pos = c.stack[c.sp];
+ fseek(c.f, c.pos);
+}
+
+get(c: *compiler): int {
+ var ch: int;
+
+ ch = fgetc(c.f);
+ if ch != -1 {
+ c.pos = c.pos + 1;
+ }
+
+ return ch;
+}
+
+literal(c: *compiler, s: *byte): int {
+ var i: int;
+ var ch: int;
+
+ i = 0;
+ loop {
+ if !s[i] {
+ break;
+ }
+
+ ch = get(c);
+ if ch != (s[i]:int) {
+ fail(c);
+ return FAIL;
+ }
+
+ i = i + 1;
+ }
+
+ return OK;
+}
+
+charclass(c: *compiler, s: *byte): int {
+ var i: int;
+ var ch: int;
+
+ ch = get(c);
+
+ i = 0;
+ loop {
+ if !s[i] {
+ break;
+ }
+
+ if ch == (s[i]:int) {
+ return OK;
+ }
+
+ i = i + 1;
+ }
+
+ fail(c);
+ return FAIL;
+}
+
+any(c: *compiler): int {
+ var ch: int;
+ ch = get(c);
+ if ch == -1 {
+ fail(c);
+ return FAIL;
+ }
+ return OK;
+}
+
+// grammar <- sp rule+ !.
+p_grammar(c: *compiler): int {
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ if !p_rule(c) {
+ return FAIL;
+ }
+
+ loop {
+ choice(c);
+ if !p_rule(c) {
+ break;
+ }
+ commit(c);
+ }
+
+ choice(c);
+ if !any(c) {
+ return OK;
+ }
+ fail(c);
+ fail(c);
+
+ return FAIL;
+}
+
+// rule <- ident '<-' sp pattern
+p_rule(c: *compiler): int {
+ if !p_ident(c) {
+ return FAIL;
+ }
+
+ if !literal(c, "<-") {
+ return FAIL;
+ }
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ if !p_pattern(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// pattern <- alt ( '/' sp alt )*
+p_pattern(c: *compiler): int {
+ if !p_alt(c) {
+ return FAIL;
+ }
+
+ loop {
+ choice(c);
+
+ if !literal(c, "/") {
+ break;
+ }
+
+ if !p_sp(c) {
+ break;
+ }
+
+ if !p_alt(c) {
+ break;
+ }
+
+ commit(c);
+ }
+
+ return OK;
+}
+
+// preop <- [!&] sp
+p_preop(c: *compiler): int {
+ if !charclass(c, "!&") {
+ return FAIL;
+ }
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// alt <- ( preop? suffix )+
+p_alt(c: *compiler): int {
+ choice(c);
+ if p_preop(c) {
+ commit(c);
+ }
+
+ if !p_suffix(c) {
+ return FAIL;
+ }
+
+ loop {
+ choice(c);
+
+ choice(c);
+ if p_preop(c) {
+ commit(c);
+ }
+
+
+ if !p_suffix(c) {
+ break;
+ }
+
+ commit(c);
+ }
+
+ return OK;
+}
+
+// postop <- [*+?] sp
+p_postop(c: *compiler): int {
+ if !charclass(c, "*+?") {
+ return FAIL;
+ }
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// suffix <- primary postop*
+p_suffix(c: *compiler): int {
+ if !p_primary(c) {
+ return FAIL;
+ }
+
+ loop {
+ choice(c);
+
+ if !p_postop(c) {
+ break;
+ }
+
+ commit(c);
+ }
+
+ return OK;
+}
+
+// primary <- group / any / literal / charclass / nonterminal
+p_primary(c: *compiler): int {
+ choice(c);
+ if p_group(c) {
+ commit(c);
+ return OK;
+ }
+
+ choice(c);
+ if p_any(c) {
+ commit(c);
+ return OK;
+ }
+
+ choice(c);
+ if p_literal(c) {
+ commit(c);
+ return OK;
+ }
+
+ choice(c);
+ if p_charclass(c) {
+ commit(c);
+ return OK;
+ }
+
+ choice(c);
+ if p_nonterminal(c) {
+ commit(c);
+ return OK;
+ }
+
+ fail(c);
+ return FAIL;
+}
+
+// group <- '(' sp pattern ')' sp
+p_group(c: *compiler): int {
+ if !literal(c, "(") {
+ return FAIL;
+ }
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ if !p_pattern(c) {
+ return FAIL;
+ }
+
+ if !literal(c, ")") {
+ return FAIL;
+ }
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// any <- '.' sp
+p_any(c: *compiler): int {
+ if !literal(c, ".") {
+ return FAIL;
+ }
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// literal <- ['] ( !['] . )* ['] sp
+p_literal(c: *compiler): int {
+ choice(c);
+ if !literal(c, "'") {
+ return FAIL;
+ }
+ commit(c);
+
+ loop {
+ choice(c);
+
+ choice(c);
+ if literal(c, "'") {
+ fail(c);
+ fail(c);
+ break;
+ }
+
+ if !any(c) {
+ break;
+ }
+
+ commit(c);
+ }
+
+ choice(c);
+ if !literal(c, "'") {
+ return FAIL;
+ }
+ commit(c);
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// charclass <- '[' ( !']' ( . '-' . / . ) )* ']' sp
+p_charclass(c: *compiler): int {
+ choice(c);
+ if !literal(c, "[") {
+ return FAIL;
+ }
+ commit(c);
+
+ loop {
+ choice(c);
+
+ choice(c);
+ if literal(c, "]") {
+ fail(c);
+ fail(c);
+ break;
+ }
+
+ if !any(c) {
+ break;
+ }
+
+ choice(c);
+ if literal(c, "-") {
+ if any(c) {
+ commit(c);
+ }
+ }
+
+ commit(c);
+ }
+
+ choice(c);
+ if !literal(c, "]") {
+ return FAIL;
+ }
+ commit(c);
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// nonterminal <- ident !'<-'
+p_nonterminal(c: *compiler): int {
+ if !p_ident(c) {
+ return FAIL;
+ }
+
+ choice(c);
+ if !literal(c, "<-") {
+ return OK;
+ }
+ fail(c);
+ fail(c);
+ return FAIL;
+}
+
+// ident <- [a-zA-Z]+ sp
+p_ident(c: *compiler): int {
+ var chars: *byte;
+
+ chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+ if !charclass(c, chars) {
+ return FAIL;
+ }
+
+ loop {
+ choice(c);
+
+ if !charclass(c, chars) {
+ break;
+ }
+
+ commit(c);
+ }
+
+ if !p_sp(c) {
+ return FAIL;
+ }
+
+ return OK;
+}
+
+// sp <- ( [ \t\r\n] / '#' ( ![\r\n] . )* )*
+p_sp(c: *compiler): int {
+ loop {
+ choice(c);
+
+ choice(c);
+ if charclass(c, " \t\r\n") {
+ commit(c);
+ commit(c);
+ continue;
+ }
+
+ choice(c);
+ if literal(c, "#") {
+ commit(c);
+
+ loop {
+ choice(c);
+
+ choice(c);
+ if charclass(c, "\r\n") {
+ fail(c);
+ fail(c);
+ break;
+ }
+
+ if !any(c) {
+ break;
+ }
+
+ commit(c);
+ }
+
+ commit(c);
+ continue;
+ }
+
+ break;
+ }
+
+ return OK;
+}
+
+main(argc: int, argv: **byte, envp: **byte) {
+ var fd: int;
+ var a: alloc;
+ var c: compiler;
+ setup_alloc(&a);
+
+ c.a = &a;
+ c.pos = 0;
+ c.limit = 1024;
+ c.stack = alloc(c.a, c.limit * sizeof(c.stack[0])):*int;
+
+ if argc != 2 {
+ die("usage: ./peg <grammar.peg>");
+ }
+
+ fd = open(argv[1], 0, 0);
+ if fd < 0 {
+ die("failed to open grammar");
+ }
+
+ c.f = fopen(fd, c.a);
+
+ if !p_grammar(&c) {
+ die("FAIL");
+ }
+ fdputs(1, "OK\n");
+}
diff --git a/peg.peg b/peg.peg
@@ -0,0 +1,15 @@
+grammar <- sp rule+ !.
+rule <- ident '<-' sp pattern
+pattern <- alt ( '/' sp alt )*
+preop <- [!&] sp
+alt <- ( preop? suffix )+
+postop <- [*+?] sp
+suffix <- primary postop*
+primary <- group / any / literal / charclass / nonterminal
+group <- '(' sp pattern ')' sp
+any <- '.' sp
+literal <- ['] ( !['] . )* ['] sp
+charclass <- '[' ( !']' ( . '-' . / . ) )* ']' sp
+nonterminal <- ident !'<-'
+ident <- [a-zA-Z]+ sp
+sp <- ( [ \t\r\n] / '#' ( ![\r\n] . )* )*
diff --git a/syscall.c b/syscall.c
@@ -61,6 +61,10 @@ poll(pfd: *int, nfd: int, timeout: int): int {
return syscall(7, pfd:int, nfd, timeout, 0, 0, 0);
}
+lseek(fd: int, off: int, whence: int): int {
+ return syscall(8, fd, off, whence, 0, 0, 0);
+}
+
mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int {
return syscall(9, addr, len, prot, flags, fd, off);
}