commit 5101b39a05121114a1f603ee93dc7d7a4534ebd1
parent 926ecdedf8184453554bc0a2ae8a4588d25c0f00
Author: erai <erai@omiltem.net>
Date: Sat, 14 Sep 2024 15:07:36 -0400
Use cc3.peg to parse into parse trees
Diffstat:
M | .gitignore | | | 1 | + |
M | build.sh | | | 3 | ++- |
A | cc3.c | | | 31 | +++++++++++++++++++++++++++++++ |
M | cc3.peg | | | 107 | ++++++++++++++++++++++++++++++++++++++++++++----------------------------------- |
M | peg.c | | | 29 | +++++++++++++++++++++++++---- |
A | peglib.c | | | 304 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
6 files changed, 422 insertions(+), 53 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@ disk
cc0
cc1
cc2
+cc3
gencc
parse3.c
pxe
diff --git a/build.sh b/build.sh
@@ -14,7 +14,8 @@ ALL="${LIBS} ${CC} ${PEG} ${BOOT} ${SSHD} ${KERNEL} ${SHELL} ${BIN}"
./cc1 ${LIBS} ${CC} -o cc2
./cc1 ${LIBS} ${PEG} -o peg
-./peg peg.peg
+./peg -o parse3.c cc3.peg
+./cc1 -o cc3 bufio.c lib.c alloc.c syscall.c peglib.c cc3.c parse3.c
./cc2 ${LIBS} echo.c -o echo
./cc2 ${LIBS} cmp.c -o cmp
diff --git a/cc3.c b/cc3.c
@@ -0,0 +1,31 @@
+main(argc: int, argv: **byte, envp: **byte) {
+ var fd: int;
+ var f: *file;
+ var out: *file;
+ var peg: *peg;
+ var a: alloc;
+ var len: int;
+ var src: *byte;
+ var node: *peg_node;
+ setup_alloc(&a);
+
+ fd = open(argv[1], 0, 0);
+ if fd < 0 {
+ die("open failed");
+ }
+
+ f = fopen(fd, &a);
+ src = freadall(f, &len);
+ fclose(f);
+
+ peg = peg_new(src, len, &a);
+ node = peg_parse(peg);
+ peg_free(peg);
+
+ out = fopen(1, &a);
+ peg_show(out, node);
+ fputc(out, '\n');
+
+ fflush(out);
+ fclose(out);
+}
diff --git a/cc3.peg b/cc3.peg
@@ -1,21 +1,23 @@
grammar <- sp (enum_decl / struct_decl / func_decl)* !.
-enum_decl <- enum ident '{' sp (ident (',' ident)*)? (',' sp)? '}' sp
+enum_def <- ident sp ('=' sp expr)?
+enum_decl <- enum sp '{' sp (enum_def (',' sp enum_def)*)? (',' sp)? '}' sp
-struct_decl <- struct ident '{' sp (ident ':' sp type ';' sp)* '}' sp
+struct_def <- ident sp ':' sp type ';' sp
+struct_decl <- struct sp ident sp '{' sp struct_def* '}' sp
-func_decl <- ident func_type (';' / '{' sp stmt* '}' ) sp
+func_decl <- ident sp func_type (';' / '{' sp stmt* '}') sp
-type <- ident
- / byte
- / int
- / void
+type <- ident sp
+ / byte sp
+ / int sp
+ / void sp
+ / func sp func_type
/ '*' sp type
/ '(' sp type ')' sp
- / func func_type
func_type <- '(' sp
- ( ident ':' sp type (',' sp ident ':' sp type)* )?
+ ( ident sp ':' sp type (',' sp ident sp ':' sp type)* )?
( ',' sp )?
')' sp (':' sp type)?
@@ -30,24 +32,25 @@ stmt <- if_stmt
/ assign_stmt
/ expr_stmt
/ empty_stmt
+ / compound_stmt
-if_stmt <- if expr '{' sp stmt* '}' sp
- (else if expr '{' sp stmt* '}' sp)*
- (else '{' sp stmt '}' sp)?
+if_stmt <- if sp expr '{' sp stmt* '}' sp
+ (else sp if sp expr '{' sp stmt* '}' sp)*
+ (else sp '{' sp stmt* '}' sp)?
-loop_stmt <- loop '{' sp stmt* '}' sp
+loop_stmt <- loop sp '{' sp stmt* '}' sp
-break_stmt <- break ';' sp
+break_stmt <- break sp ';' sp
continue_stmt <- continue sp ';' sp
-return_stmt <- return expr? sp ';' sp
+return_stmt <- return sp expr? sp ';' sp
-var_stmt <- var ident ':' sp type ';' sp
+var_stmt <- var sp ident sp ':' sp type ';' sp
-label_stmt <- ':' sp ident ';' sp
+label_stmt <- ':' sp ident sp ';' sp
-goto_stmt <- goto ident ';' sp
+goto_stmt <- goto sp ident sp ';' sp
assign_stmt <- unary_expr '=' sp expr ';' sp
@@ -55,32 +58,40 @@ expr_stmt <- expr ';' sp
empty_stmt <- ';' sp
+compound_stmt <- '{' sp stmt* '}' sp
+
+expr <- bool_expr
+
bool_expr <- comp_expr (('&&' / '||') sp comp_expr)*
comp_expr <- add_expr (('<=' / '>=' / '<' / '>' / '==' / '!=') sp add_expr)?
-add_expr <- mul_expr (('+' / '-' / '|' / '^') sp add_expr)*
+add_expr <- mul_expr (('+' / '-' / '|' !'|' / '^') sp add_expr)*
-mul_expr <- shift_expr (('*' / '/' / '%' / '&') sp mul_expr)*
+mul_expr <- shift_expr (('*' / '/' / '%' / '&' !'&') sp mul_expr)*
shift_expr <- unary_expr (('<<' / '>>') sp shift_expr)*
-unary_expr <- (('&' / '*' / '+' / '-' / '~' / '!') sp)* post_expr
+unary_expr <- (('&' !'&' / '*' / '+' / '-' / '~' / '!') sp)* post_expr
post_expr <- primary ( '[' sp expr ']' sp
/ '(' sp ( expr (',' sp expr)* )? (',' sp)? ')' sp
- / '.' sp ident
+ / '.' sp ident sp
/ ':' sp type )*
-primary <- ident
- / literal
+primary <- ident sp
+ / hexadecimal sp
+ / decimal sp
+ / string sp
+ / character sp
/ '(' sp expr ')' sp
- / sizeof '(' sp expr ')' sp
+ / sizeof sp '(' sp expr ')' sp
-literal <- '0x'[0-9a-fA-F]+ sp
- / [0-9]+ sp
- / ["] ([\\] . / .)* ["] sp
- / ['] ([\\] . / .) ['] sp
+hexadecimal <- '0x'[0-9a-fA-F]+
+decimal <- [0-9]+
+
+string <- ["] ([\\] . / !["] .)* ["]
+character <- ['] ([\\] . / !['] .) [']
reserved <- return
/ break
@@ -96,24 +107,24 @@ reserved <- return
/ byte
/ int
/ void
-
-return <- 'return' tc
-break <- 'break' tc
-sizeof <- 'sizeof' tc
-if <- 'if' tc
-else <- 'else' tc
-loop <- 'loop' tc
-continue <- 'continue' tc
-goto <- 'goto' tc
-var <- 'var' tc
-enum <- 'enum' tc
-struct <- 'struct' tc
-byte <- 'byte' tc
-int <- 'int' tc
-void <- 'void' tc
-
-ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]* sp
-
-tc <- ![a-zA-Z0-9_] sp
+ / func
+
+return <- 'return' ![a-zA-Z0-9_]
+break <- 'break' ![a-zA-Z0-9_]
+sizeof <- 'sizeof' ![a-zA-Z0-9_]
+if <- 'if' ![a-zA-Z0-9_]
+else <- 'else' ![a-zA-Z0-9_]
+loop <- 'loop' ![a-zA-Z0-9_]
+continue <- 'continue' ![a-zA-Z0-9_]
+goto <- 'goto' ![a-zA-Z0-9_]
+var <- 'var' ![a-zA-Z0-9_]
+enum <- 'enum' ![a-zA-Z0-9_]
+struct <- 'struct' ![a-zA-Z0-9_]
+byte <- 'byte' ![a-zA-Z0-9_]
+int <- 'int' ![a-zA-Z0-9_]
+void <- 'void' ![a-zA-Z0-9_]
+func <- 'func' ![a-zA-Z0-9_]
+
+ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]*
sp <- ( [ \r\n\t] / '//' (![\r\n] .)* )*
diff --git a/peg.c b/peg.c
@@ -776,7 +776,7 @@ translate_literal(c: *peg, n: *peg_node) {
ch = n.str[i]:int;
- if ch < 32 || ch > 127 {
+ if ch < 32 || ch > 127 || ch == '\\' || ch == '"' {
fputc(c.fout, '\\');
fputc(c.fout, 'x');
fputc(c.fout, hex[ch >> 4]:int);
@@ -914,7 +914,7 @@ translate_charset(c: *peg, n: *peg_node) {
}
if c.scratch[i] {
- if i < 32 || i > 127 {
+ if ch < 32 || ch > 127 || ch == '\\' || ch == '"' {
fputc(c.fout, '\\');
fputc(c.fout, 'x');
fputc(c.fout, hex[i >> 4]:int);
@@ -1002,7 +1002,7 @@ translate_pattern(c: *peg, n: *peg_node) {
fputs(c.fout, " loop {\n");
fputs(c.fout, " choice(c);\n");
translate_pattern(c, n.child);
- fputs(c.fout, " if !ok { break }\n");
+ fputs(c.fout, " if !ok { ok = 1; break; }\n");
fputs(c.fout, " commit(c);\n");
fputs(c.fout, " }\n");
} else if count == ONE_OR_MORE {
@@ -1011,7 +1011,7 @@ translate_pattern(c: *peg, n: *peg_node) {
fputs(c.fout, " loop {\n");
fputs(c.fout, " choice(c);\n");
translate_pattern(c, n.child);
- fputs(c.fout, " if !ok { break }\n");
+ fputs(c.fout, " if !ok { ok = 1; break; }\n");
fputs(c.fout, " commit(c);\n");
fputs(c.fout, " }\n");
fputs(c.fout, " }\n");
@@ -1067,6 +1067,27 @@ translate(c: *peg, n: *peg_node) {
}
fputs(c.fout, "}\n");
+ // Generate tag to string
+ fputs(c.fout, "\ntag_to_str(tag: int): *byte {\n");
+ v = n.child;
+ loop {
+ if !v {
+ break;
+ }
+
+ if v.tag == P_rule {
+ fputs(c.fout, " if tag == P_");
+ fputb(c.fout, v.child.str, v.child.len);
+ fputs(c.fout, " { return \"");
+ fputb(c.fout, v.child.str, v.child.len);
+ fputs(c.fout, "\"; }\n");
+ }
+
+ v = v.next;
+ }
+ fputs(c.fout, " die(\"invalid tag\");\n");
+ fputs(c.fout, "}\n");
+
// Generate parsing functions for each rule
v = n.child;
loop {
diff --git a/peglib.c b/peglib.c
@@ -0,0 +1,304 @@
+struct peg_frame {
+ pos: int;
+ depth: int;
+ op: int;
+}
+
+struct peg_op {
+ tag: int;
+ nargs: int;
+ start: int;
+ end: int;
+}
+
+struct peg {
+ a: *alloc;
+
+ src: *byte;
+ size: int;
+ pos: int;
+
+ stack: *peg_frame;
+ sp: int;
+ limit: int;
+
+ depth: int;
+ op: int;
+ out: *peg_op;
+ cap: int;
+
+ nstack: **peg_node;
+ np: int;
+ ncap: int;
+}
+
+struct peg_node {
+ tag: int;
+ next: *peg_node;
+ child: *peg_node;
+ str: *byte;
+ len: int;
+}
+
+choice(c: *peg) {
+ if c.sp == c.limit {
+ die("choice overflow");
+ }
+ c.stack[c.sp].pos = c.pos;
+ c.stack[c.sp].depth = c.depth;
+ c.stack[c.sp].op = c.op;
+ c.sp = c.sp + 1;
+}
+
+commit(c: *peg) {
+ if c.sp == 0 {
+ die("commit underflow");
+ }
+ c.sp = c.sp - 1;
+}
+
+fail(c: *peg) {
+ if c.sp == 0 {
+ die("fail underflow");
+ }
+ c.sp = c.sp - 1;
+ c.pos = c.stack[c.sp].pos;
+ c.depth = c.stack[c.sp].depth;
+ c.op = c.stack[c.sp].op;
+}
+
+get(c: *peg): int {
+ var ch: int;
+
+ if c.pos == c.size {
+ return -1;
+ }
+
+ ch = c.src[c.pos]:int;
+ c.pos = c.pos + 1;
+
+ return ch;
+}
+
+literal(c: *peg, s: *byte): int {
+ var i: int;
+ var ch: int;
+
+ i = 0;
+ loop {
+ if !s[i] {
+ break;
+ }
+
+ ch = get(c);
+ if ch != (s[i]:int) {
+ fail(c);
+ return 0;
+ }
+
+ i = i + 1;
+ }
+
+ return 1;
+}
+
+enter(c: *peg) {
+ choice(c);
+}
+
+leave(c: *peg, tag: int) {
+ var nargs: int;
+ var start: int;
+ var end: int;
+ var tmp: *byte;
+
+ commit(c);
+
+ nargs = c.depth - c.stack[c.sp].depth;
+ start = c.stack[c.sp].pos;
+ end = c.pos;
+
+ if c.op == c.cap {
+ if c.cap == 0 {
+ c.cap = 1024;
+ c.out = alloc(c.a, c.cap * sizeof(c.out[0])):*peg_op;
+ } else {
+ c.cap = c.cap * 2;
+ tmp = alloc(c.a, c.cap * sizeof(c.out[0]));
+ memcpy(tmp, c.out:*byte, c.op * sizeof(c.out[0]));
+ free(c.a, c.out:*byte);
+ c.out = tmp:*peg_op;
+ }
+ }
+
+ c.out[c.op].tag = tag;
+ c.out[c.op].nargs = nargs;
+ c.out[c.op].start = start;
+ c.out[c.op].end = end;
+
+ c.op = c.op + 1;
+ c.depth = c.depth - nargs + 1;
+}
+
+charset(c: *peg, s: *byte): int {
+ var i: int;
+ var ch: int;
+
+ ch = get(c);
+
+ i = 0;
+ loop {
+ if !s[i] {
+ fail(c);
+ return 0;
+ }
+
+ if ch == (s[i]:int) {
+ break;
+ }
+
+ i = i + 1;
+ }
+
+ return 1;
+}
+
+any(c: *peg): int {
+ var ch: int;
+ ch = get(c);
+ if ch == -1 {
+ fail(c);
+ return 0;
+ }
+ return 1;
+}
+
+construct(c: *peg): *peg_node {
+ var i: int;
+ var j: int;
+ var nargs: int;
+ var n: *peg_node;
+ var link: **peg_node;
+
+ c.nstack[0] = 0:*peg_node;
+
+ i = 0;
+ loop {
+ if i == c.op {
+ return c.nstack[0];
+ }
+
+ n = alloc(c.a, sizeof(*n)):*peg_node;
+
+ n.tag = c.out[i].tag;
+ n.next = 0:*peg_node;
+ n.child = 0:*peg_node;
+ n.str = &c.src[c.out[i].start];
+ n.len = c.out[i].end - c.out[i].start;
+
+ nargs = c.out[i].nargs;
+ if nargs > c.np {
+ die("node underflow");
+ }
+
+ link = &n.child;
+ j = c.np - nargs;
+ loop {
+ if j == c.np {
+ break;
+ }
+
+ *link = c.nstack[j];
+ link = &c.nstack[j].next;
+
+ j = j + 1;
+ }
+
+ c.np = c.np - nargs;
+ if c.np == c.ncap {
+ die("node overflow");
+ }
+
+ c.nstack[c.np] = n;
+ c.np = c.np + 1;
+
+ i = i + 1;
+ }
+}
+
+peg_new(src: *byte, len: int, a: *alloc): *peg {
+ var c: *peg;
+
+ c = alloc(a, sizeof(*c)):*peg;
+
+ c.a = a;
+
+ c.src = src;
+ c.size = len;
+ c.pos = 0;
+
+ c.limit = 1024;
+ c.stack = alloc(a, c.limit * sizeof(*c.stack)):*peg_frame;
+ c.sp = 0;
+
+ c.depth = 0;
+ c.op = 0;
+ c.out = 0:*peg_op;
+ c.cap = 0;
+
+ c.ncap = 1024;
+ c.nstack = alloc(a, c.ncap * sizeof(*c.nstack)):**peg_node;
+ c.np = 0;
+
+ return c;
+}
+
+peg_parse(c: *peg): *peg_node {
+ choice(c);
+ if !p_grammar(c) {
+ die("syntax error");
+ }
+ commit(c);
+ return construct(c);
+}
+
+peg_reset(c: *peg, src: *byte, len: int) {
+ c.src = src;
+ c.size = len;
+ c.pos = 0;
+ c.depth = 0;
+ c.sp = 0;
+ c.op = 0;
+ c.np = 0;
+}
+
+peg_free(c: *peg) {
+ free(c.a, c.stack: *byte);
+ free(c.a, c.nstack: *byte);
+ free(c.a, c.out: *byte);
+ free(c.a, c: *byte);
+}
+
+peg_show(out: *file, n: *peg_node) {
+ fputs(out, "(");
+ fputs(out, tag_to_str(n.tag));
+ if n.child {
+ n = n.child;
+ loop {
+ if !n {
+ break;
+ }
+
+ fputc(out, ' ');
+ peg_show(out, n);
+
+ n = n.next;
+ }
+ } else {
+ fputc(out, ' ');
+ fputc(out, '"');
+ fputb(out, n.str, n.len);
+ fputc(out, '"');
+ }
+ fputs(out, ")");
+}