os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 5101b39a05121114a1f603ee93dc7d7a4534ebd1
parent 926ecdedf8184453554bc0a2ae8a4588d25c0f00
Author: erai <erai@omiltem.net>
Date:   Sat, 14 Sep 2024 15:07:36 -0400

Use cc3.peg to parse into parse trees

Diffstat:
M.gitignore | 1+
Mbuild.sh | 3++-
Acc3.c | 31+++++++++++++++++++++++++++++++
Mcc3.peg | 107++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mpeg.c | 29+++++++++++++++++++++++++----
Apeglib.c | 304+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 422 insertions(+), 53 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -4,6 +4,7 @@ disk cc0 cc1 cc2 +cc3 gencc parse3.c pxe diff --git a/build.sh b/build.sh @@ -14,7 +14,8 @@ ALL="${LIBS} ${CC} ${PEG} ${BOOT} ${SSHD} ${KERNEL} ${SHELL} ${BIN}" ./cc1 ${LIBS} ${CC} -o cc2 ./cc1 ${LIBS} ${PEG} -o peg -./peg peg.peg +./peg -o parse3.c cc3.peg +./cc1 -o cc3 bufio.c lib.c alloc.c syscall.c peglib.c cc3.c parse3.c ./cc2 ${LIBS} echo.c -o echo ./cc2 ${LIBS} cmp.c -o cmp diff --git a/cc3.c b/cc3.c @@ -0,0 +1,31 @@ +main(argc: int, argv: **byte, envp: **byte) { + var fd: int; + var f: *file; + var out: *file; + var peg: *peg; + var a: alloc; + var len: int; + var src: *byte; + var node: *peg_node; + setup_alloc(&a); + + fd = open(argv[1], 0, 0); + if fd < 0 { + die("open failed"); + } + + f = fopen(fd, &a); + src = freadall(f, &len); + fclose(f); + + peg = peg_new(src, len, &a); + node = peg_parse(peg); + peg_free(peg); + + out = fopen(1, &a); + peg_show(out, node); + fputc(out, '\n'); + + fflush(out); + fclose(out); +} diff --git a/cc3.peg b/cc3.peg @@ -1,21 +1,23 @@ grammar <- sp (enum_decl / struct_decl / func_decl)* !. -enum_decl <- enum ident '{' sp (ident (',' ident)*)? (',' sp)? '}' sp +enum_def <- ident sp ('=' sp expr)? +enum_decl <- enum sp '{' sp (enum_def (',' sp enum_def)*)? (',' sp)? '}' sp -struct_decl <- struct ident '{' sp (ident ':' sp type ';' sp)* '}' sp +struct_def <- ident sp ':' sp type ';' sp +struct_decl <- struct sp ident sp '{' sp struct_def* '}' sp -func_decl <- ident func_type (';' / '{' sp stmt* '}' ) sp +func_decl <- ident sp func_type (';' / '{' sp stmt* '}') sp -type <- ident - / byte - / int - / void +type <- ident sp + / byte sp + / int sp + / void sp + / func sp func_type / '*' sp type / '(' sp type ')' sp - / func func_type func_type <- '(' sp - ( ident ':' sp type (',' sp ident ':' sp type)* )? + ( ident sp ':' sp type (',' sp ident sp ':' sp type)* )? ( ',' sp )? ')' sp (':' sp type)? @@ -30,24 +32,25 @@ stmt <- if_stmt / assign_stmt / expr_stmt / empty_stmt + / compound_stmt -if_stmt <- if expr '{' sp stmt* '}' sp - (else if expr '{' sp stmt* '}' sp)* - (else '{' sp stmt '}' sp)? +if_stmt <- if sp expr '{' sp stmt* '}' sp + (else sp if sp expr '{' sp stmt* '}' sp)* + (else sp '{' sp stmt* '}' sp)? -loop_stmt <- loop '{' sp stmt* '}' sp +loop_stmt <- loop sp '{' sp stmt* '}' sp -break_stmt <- break ';' sp +break_stmt <- break sp ';' sp continue_stmt <- continue sp ';' sp -return_stmt <- return expr? sp ';' sp +return_stmt <- return sp expr? sp ';' sp -var_stmt <- var ident ':' sp type ';' sp +var_stmt <- var sp ident sp ':' sp type ';' sp -label_stmt <- ':' sp ident ';' sp +label_stmt <- ':' sp ident sp ';' sp -goto_stmt <- goto ident ';' sp +goto_stmt <- goto sp ident sp ';' sp assign_stmt <- unary_expr '=' sp expr ';' sp @@ -55,32 +58,40 @@ expr_stmt <- expr ';' sp empty_stmt <- ';' sp +compound_stmt <- '{' sp stmt* '}' sp + +expr <- bool_expr + bool_expr <- comp_expr (('&&' / '||') sp comp_expr)* comp_expr <- add_expr (('<=' / '>=' / '<' / '>' / '==' / '!=') sp add_expr)? -add_expr <- mul_expr (('+' / '-' / '|' / '^') sp add_expr)* +add_expr <- mul_expr (('+' / '-' / '|' !'|' / '^') sp add_expr)* -mul_expr <- shift_expr (('*' / '/' / '%' / '&') sp mul_expr)* +mul_expr <- shift_expr (('*' / '/' / '%' / '&' !'&') sp mul_expr)* shift_expr <- unary_expr (('<<' / '>>') sp shift_expr)* -unary_expr <- (('&' / '*' / '+' / '-' / '~' / '!') sp)* post_expr +unary_expr <- (('&' !'&' / '*' / '+' / '-' / '~' / '!') sp)* post_expr post_expr <- primary ( '[' sp expr ']' sp / '(' sp ( expr (',' sp expr)* )? (',' sp)? ')' sp - / '.' sp ident + / '.' sp ident sp / ':' sp type )* -primary <- ident - / literal +primary <- ident sp + / hexadecimal sp + / decimal sp + / string sp + / character sp / '(' sp expr ')' sp - / sizeof '(' sp expr ')' sp + / sizeof sp '(' sp expr ')' sp -literal <- '0x'[0-9a-fA-F]+ sp - / [0-9]+ sp - / ["] ([\\] . / .)* ["] sp - / ['] ([\\] . / .) ['] sp +hexadecimal <- '0x'[0-9a-fA-F]+ +decimal <- [0-9]+ + +string <- ["] ([\\] . / !["] .)* ["] +character <- ['] ([\\] . / !['] .) ['] reserved <- return / break @@ -96,24 +107,24 @@ reserved <- return / byte / int / void - -return <- 'return' tc -break <- 'break' tc -sizeof <- 'sizeof' tc -if <- 'if' tc -else <- 'else' tc -loop <- 'loop' tc -continue <- 'continue' tc -goto <- 'goto' tc -var <- 'var' tc -enum <- 'enum' tc -struct <- 'struct' tc -byte <- 'byte' tc -int <- 'int' tc -void <- 'void' tc - -ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]* sp - -tc <- ![a-zA-Z0-9_] sp + / func + +return <- 'return' ![a-zA-Z0-9_] +break <- 'break' ![a-zA-Z0-9_] +sizeof <- 'sizeof' ![a-zA-Z0-9_] +if <- 'if' ![a-zA-Z0-9_] +else <- 'else' ![a-zA-Z0-9_] +loop <- 'loop' ![a-zA-Z0-9_] +continue <- 'continue' ![a-zA-Z0-9_] +goto <- 'goto' ![a-zA-Z0-9_] +var <- 'var' ![a-zA-Z0-9_] +enum <- 'enum' ![a-zA-Z0-9_] +struct <- 'struct' ![a-zA-Z0-9_] +byte <- 'byte' ![a-zA-Z0-9_] +int <- 'int' ![a-zA-Z0-9_] +void <- 'void' ![a-zA-Z0-9_] +func <- 'func' ![a-zA-Z0-9_] + +ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]* sp <- ( [ \r\n\t] / '//' (![\r\n] .)* )* diff --git a/peg.c b/peg.c @@ -776,7 +776,7 @@ translate_literal(c: *peg, n: *peg_node) { ch = n.str[i]:int; - if ch < 32 || ch > 127 { + if ch < 32 || ch > 127 || ch == '\\' || ch == '"' { fputc(c.fout, '\\'); fputc(c.fout, 'x'); fputc(c.fout, hex[ch >> 4]:int); @@ -914,7 +914,7 @@ translate_charset(c: *peg, n: *peg_node) { } if c.scratch[i] { - if i < 32 || i > 127 { + if ch < 32 || ch > 127 || ch == '\\' || ch == '"' { fputc(c.fout, '\\'); fputc(c.fout, 'x'); fputc(c.fout, hex[i >> 4]:int); @@ -1002,7 +1002,7 @@ translate_pattern(c: *peg, n: *peg_node) { fputs(c.fout, " loop {\n"); fputs(c.fout, " choice(c);\n"); translate_pattern(c, n.child); - fputs(c.fout, " if !ok { break }\n"); + fputs(c.fout, " if !ok { ok = 1; break; }\n"); fputs(c.fout, " commit(c);\n"); fputs(c.fout, " }\n"); } else if count == ONE_OR_MORE { @@ -1011,7 +1011,7 @@ translate_pattern(c: *peg, n: *peg_node) { fputs(c.fout, " loop {\n"); fputs(c.fout, " choice(c);\n"); translate_pattern(c, n.child); - fputs(c.fout, " if !ok { break }\n"); + fputs(c.fout, " if !ok { ok = 1; break; }\n"); fputs(c.fout, " commit(c);\n"); fputs(c.fout, " }\n"); fputs(c.fout, " }\n"); @@ -1067,6 +1067,27 @@ translate(c: *peg, n: *peg_node) { } fputs(c.fout, "}\n"); + // Generate tag to string + fputs(c.fout, "\ntag_to_str(tag: int): *byte {\n"); + v = n.child; + loop { + if !v { + break; + } + + if v.tag == P_rule { + fputs(c.fout, " if tag == P_"); + fputb(c.fout, v.child.str, v.child.len); + fputs(c.fout, " { return \""); + fputb(c.fout, v.child.str, v.child.len); + fputs(c.fout, "\"; }\n"); + } + + v = v.next; + } + fputs(c.fout, " die(\"invalid tag\");\n"); + fputs(c.fout, "}\n"); + // Generate parsing functions for each rule v = n.child; loop { diff --git a/peglib.c b/peglib.c @@ -0,0 +1,304 @@ +struct peg_frame { + pos: int; + depth: int; + op: int; +} + +struct peg_op { + tag: int; + nargs: int; + start: int; + end: int; +} + +struct peg { + a: *alloc; + + src: *byte; + size: int; + pos: int; + + stack: *peg_frame; + sp: int; + limit: int; + + depth: int; + op: int; + out: *peg_op; + cap: int; + + nstack: **peg_node; + np: int; + ncap: int; +} + +struct peg_node { + tag: int; + next: *peg_node; + child: *peg_node; + str: *byte; + len: int; +} + +choice(c: *peg) { + if c.sp == c.limit { + die("choice overflow"); + } + c.stack[c.sp].pos = c.pos; + c.stack[c.sp].depth = c.depth; + c.stack[c.sp].op = c.op; + c.sp = c.sp + 1; +} + +commit(c: *peg) { + if c.sp == 0 { + die("commit underflow"); + } + c.sp = c.sp - 1; +} + +fail(c: *peg) { + if c.sp == 0 { + die("fail underflow"); + } + c.sp = c.sp - 1; + c.pos = c.stack[c.sp].pos; + c.depth = c.stack[c.sp].depth; + c.op = c.stack[c.sp].op; +} + +get(c: *peg): int { + var ch: int; + + if c.pos == c.size { + return -1; + } + + ch = c.src[c.pos]:int; + c.pos = c.pos + 1; + + return ch; +} + +literal(c: *peg, s: *byte): int { + var i: int; + var ch: int; + + i = 0; + loop { + if !s[i] { + break; + } + + ch = get(c); + if ch != (s[i]:int) { + fail(c); + return 0; + } + + i = i + 1; + } + + return 1; +} + +enter(c: *peg) { + choice(c); +} + +leave(c: *peg, tag: int) { + var nargs: int; + var start: int; + var end: int; + var tmp: *byte; + + commit(c); + + nargs = c.depth - c.stack[c.sp].depth; + start = c.stack[c.sp].pos; + end = c.pos; + + if c.op == c.cap { + if c.cap == 0 { + c.cap = 1024; + c.out = alloc(c.a, c.cap * sizeof(c.out[0])):*peg_op; + } else { + c.cap = c.cap * 2; + tmp = alloc(c.a, c.cap * sizeof(c.out[0])); + memcpy(tmp, c.out:*byte, c.op * sizeof(c.out[0])); + free(c.a, c.out:*byte); + c.out = tmp:*peg_op; + } + } + + c.out[c.op].tag = tag; + c.out[c.op].nargs = nargs; + c.out[c.op].start = start; + c.out[c.op].end = end; + + c.op = c.op + 1; + c.depth = c.depth - nargs + 1; +} + +charset(c: *peg, s: *byte): int { + var i: int; + var ch: int; + + ch = get(c); + + i = 0; + loop { + if !s[i] { + fail(c); + return 0; + } + + if ch == (s[i]:int) { + break; + } + + i = i + 1; + } + + return 1; +} + +any(c: *peg): int { + var ch: int; + ch = get(c); + if ch == -1 { + fail(c); + return 0; + } + return 1; +} + +construct(c: *peg): *peg_node { + var i: int; + var j: int; + var nargs: int; + var n: *peg_node; + var link: **peg_node; + + c.nstack[0] = 0:*peg_node; + + i = 0; + loop { + if i == c.op { + return c.nstack[0]; + } + + n = alloc(c.a, sizeof(*n)):*peg_node; + + n.tag = c.out[i].tag; + n.next = 0:*peg_node; + n.child = 0:*peg_node; + n.str = &c.src[c.out[i].start]; + n.len = c.out[i].end - c.out[i].start; + + nargs = c.out[i].nargs; + if nargs > c.np { + die("node underflow"); + } + + link = &n.child; + j = c.np - nargs; + loop { + if j == c.np { + break; + } + + *link = c.nstack[j]; + link = &c.nstack[j].next; + + j = j + 1; + } + + c.np = c.np - nargs; + if c.np == c.ncap { + die("node overflow"); + } + + c.nstack[c.np] = n; + c.np = c.np + 1; + + i = i + 1; + } +} + +peg_new(src: *byte, len: int, a: *alloc): *peg { + var c: *peg; + + c = alloc(a, sizeof(*c)):*peg; + + c.a = a; + + c.src = src; + c.size = len; + c.pos = 0; + + c.limit = 1024; + c.stack = alloc(a, c.limit * sizeof(*c.stack)):*peg_frame; + c.sp = 0; + + c.depth = 0; + c.op = 0; + c.out = 0:*peg_op; + c.cap = 0; + + c.ncap = 1024; + c.nstack = alloc(a, c.ncap * sizeof(*c.nstack)):**peg_node; + c.np = 0; + + return c; +} + +peg_parse(c: *peg): *peg_node { + choice(c); + if !p_grammar(c) { + die("syntax error"); + } + commit(c); + return construct(c); +} + +peg_reset(c: *peg, src: *byte, len: int) { + c.src = src; + c.size = len; + c.pos = 0; + c.depth = 0; + c.sp = 0; + c.op = 0; + c.np = 0; +} + +peg_free(c: *peg) { + free(c.a, c.stack: *byte); + free(c.a, c.nstack: *byte); + free(c.a, c.out: *byte); + free(c.a, c: *byte); +} + +peg_show(out: *file, n: *peg_node) { + fputs(out, "("); + fputs(out, tag_to_str(n.tag)); + if n.child { + n = n.child; + loop { + if !n { + break; + } + + fputc(out, ' '); + peg_show(out, n); + + n = n.next; + } + } else { + fputc(out, ' '); + fputc(out, '"'); + fputb(out, n.str, n.len); + fputc(out, '"'); + } + fputs(out, ")"); +}