os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 2ce78bedd128b7d3d0d0c5ff3ec7f4c7c252d40b
parent ddb02625fc684a1434cc0369fcd57440e24d251e
Author: erai <erai@omiltem.net>
Date:   Thu, 12 Sep 2024 23:46:33 -0400

peg parse peg.peg into postfix notation

Diffstat:
Mbufio.c | 38+++++++++++++++++++++++++++++++++++++-
Mcc3.peg | 9++++-----
Mpeg.c | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mpeg.peg | 28++++++++++++++--------------
4 files changed, 379 insertions(+), 181 deletions(-)

diff --git a/bufio.c b/bufio.c @@ -158,5 +158,41 @@ fseek(f: *file, off: int) { f.r = 0; f.w = 0; f.eof = 0; - lseek(f.fd, off, 0); + if lseek(f.fd, off, 0) != off { + die("invalid seek"); + } +} + +freadall(f: *file, size: *int): *byte { + var i: int; + var cap: int; + var ret: *byte; + var tmp: *byte; + var ch: int; + + i = 0; + cap = 0; + loop { + ch = fgetc(f); + if ch == -1 { + *size = i; + return ret; + } + + if i == cap { + if cap == 0 { + cap = 4096; + ret = alloc(f.a, cap); + } else { + cap = cap * 2; + tmp = alloc(f.a, cap); + memcpy(tmp, ret, i); + free(f.a, ret); + ret = tmp; + } + } + + ret[i] = ch:byte; + i = i + 1; + } } diff --git a/cc3.peg b/cc3.peg @@ -35,11 +35,11 @@ if_stmt <- if expr '{' sp stmt* '}' sp (else if expr '{' sp stmt* '}' sp)* (else '{' sp stmt '}' sp)? -loop_stmt <- 'loop' sp '{' sp stmt* '}' sp +loop_stmt <- loop '{' sp stmt* '}' sp -break_stmt <- 'break' sp ';' sp +break_stmt <- break ';' sp -continue_stmt <- 'continue' sp ';' sp +continue_stmt <- continue sp ';' sp return_stmt <- return expr? sp ';' sp @@ -116,5 +116,4 @@ ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]* sp tc <- ![a-zA-Z0-9_] sp -sp <- ( [ \r\n\t] - / '//' (![\r\n] .)* )* +sp <- ( [ \r\n\t] / '//' (![\r\n] .)* )* diff --git a/peg.c b/peg.c @@ -1,10 +1,29 @@ -struct compiler { +struct peg_frame { + pos: int; + depth: int; + op: int; +} + +struct peg_op { + tag: int; + nargs: int; + start: int; + end: int; +} + +struct peg { a: *alloc; f: *file; + src: *byte; + size: int; pos: int; - stack: *int; + stack: *peg_frame; sp: int; limit: int; + depth: int; + op: int; + out: *peg_op; + cap: int; } enum { @@ -12,42 +31,47 @@ enum { OK = 1, } -choice(c: *compiler) { +choice(c: *peg) { if c.sp == c.limit { - die("backtrack overflow"); + die("choice overflow"); } - c.stack[c.sp] = c.pos; + c.stack[c.sp].pos = c.pos; + c.stack[c.sp].depth = c.depth; + c.stack[c.sp].op = c.op; c.sp = c.sp + 1; } -commit(c: *compiler) { +commit(c: *peg) { if c.sp == 0 { - die("backtrack underflow"); + die("commit underflow"); } c.sp = c.sp - 1; } -fail(c: *compiler) { +fail(c: *peg) { if c.sp == 0 { - die("backtrack underflow"); + die("fail underflow"); } c.sp = c.sp - 1; - c.pos = c.stack[c.sp]; - fseek(c.f, c.pos); + c.pos = c.stack[c.sp].pos; + c.depth = c.stack[c.sp].depth; + c.op = c.stack[c.sp].op; } -get(c: *compiler): int { +get(c: *peg): int { var ch: int; - ch = fgetc(c.f); - if ch != -1 { - c.pos = c.pos + 1; + if c.pos == c.size { + return -1; } + ch = c.src[c.pos]:int; + c.pos = c.pos + 1; + return ch; } -literal(c: *compiler, s: *byte): int { +literal(c: *peg, s: *byte): int { var i: int; var ch: int; @@ -69,7 +93,82 @@ literal(c: *compiler, s: *byte): int { return OK; } -charclass(c: *compiler, s: *byte): int { +enter(c: *peg) { + choice(c); +} + +leave(c: *peg, tag: int) { + var nargs: int; + var start: int; + var end: int; + var tmp: *byte; + + commit(c); + + nargs = c.depth - c.stack[c.sp].depth; + start = c.stack[c.sp].pos; + end = c.pos; + + if c.op == c.cap { + if c.cap == 0 { + c.cap = 1024; + c.out = alloc(c.a, c.cap * sizeof(c.out[0])):*peg_op; + } else { + c.cap = c.cap * 2; + tmp = alloc(c.a, c.cap * sizeof(c.out[0])); + memcpy(tmp, c.out:*byte, c.op * sizeof(c.out[0])); + free(c.a, c.out:*byte); + c.out = tmp:*peg_op; + } + } + + c.out[c.op].tag = tag; + c.out[c.op].nargs = nargs; + c.out[c.op].start = start; + c.out[c.op].end = end; + + c.op = c.op + 1; + c.depth = c.depth - nargs + 1; +} + +enum { + P_grammar = 1, + P_rule, + P_pattern, + P_alternative, + P_lookop, + P_lookahead, + P_countop, + P_suffix, + P_primary, + P_any, + P_literal, + P_class, + P_call, + P_identifier, + P_sp, +} + +tag_to_str(tag: int): *byte { + if tag == P_grammar { return "P_grammar"; } + if tag == P_rule { return "P_rule"; } + if tag == P_pattern { return "P_pattern"; } + if tag == P_alternative { return "P_alternative"; } + if tag == P_lookop { return "P_lookop"; } + if tag == P_lookahead { return "P_lookahead"; } + if tag == P_countop { return "P_countop"; } + if tag == P_suffix { return "P_suffix"; } + if tag == P_primary { return "P_primary"; } + if tag == P_any { return "P_any"; } + if tag == P_literal { return "P_literal"; } + if tag == P_class { return "P_class"; } + if tag == P_call { return "P_call"; } + if tag == P_identifier { return "P_identifier"; } + if tag == P_sp { return "P_sp"; } + return "(invalid)"; +} + +charclass(c: *peg, s: *byte): int { var i: int; var ch: int; @@ -78,21 +177,21 @@ charclass(c: *compiler, s: *byte): int { i = 0; loop { if !s[i] { - break; + fail(c); + return FAIL; } if ch == (s[i]:int) { - return OK; + break; } i = i + 1; } - fail(c); - return FAIL; + return OK; } -any(c: *compiler): int { +any(c: *peg): int { var ch: int; ch = get(c); if ch == -1 { @@ -103,12 +202,16 @@ any(c: *compiler): int { } // grammar <- sp rule+ !. -p_grammar(c: *compiler): int { +p_grammar(c: *peg): int { + enter(c); + if !p_sp(c) { + fail(c); return FAIL; } if !p_rule(c) { + fail(c); return FAIL; } @@ -121,39 +224,55 @@ p_grammar(c: *compiler): int { } choice(c); - if !any(c) { - return OK; + if any(c) { + fail(c); + fail(c); + return FAIL; } - fail(c); - fail(c); - return FAIL; + leave(c, P_grammar); + return OK; } -// rule <- ident '<-' sp pattern -p_rule(c: *compiler): int { - if !p_ident(c) { +// rule <- identifier sp '<-' sp pattern +p_rule(c: *peg): int { + enter(c); + + if !p_identifier(c) { + fail(c); + return FAIL; + } + + if !p_sp(c) { + fail(c); return FAIL; } if !literal(c, "<-") { + fail(c); return FAIL; } if !p_sp(c) { + fail(c); return FAIL; } if !p_pattern(c) { + fail(c); return FAIL; } + leave(c, P_rule); return OK; } -// pattern <- alt ( '/' sp alt )* -p_pattern(c: *compiler): int { - if !p_alt(c) { +// pattern <- alternative ( '/' !'/' sp alternative )* +p_pattern(c: *peg): int { + enter(c); + + if !p_alternative(c) { + fail(c); return FAIL; } @@ -164,176 +283,192 @@ p_pattern(c: *compiler): int { break; } + choice(c); + if literal(c, "/") { + fail(c); + fail(c); + return FAIL; + } + if !p_sp(c) { break; } - if !p_alt(c) { + if !p_alternative(c) { break; } commit(c); } + leave(c, P_pattern); return OK; } -// preop <- [!&] sp -p_preop(c: *compiler): int { - if !charclass(c, "!&") { - return FAIL; - } +// lookop <- [!&] +p_lookop(c: *peg): int { + enter(c); - if !p_sp(c) { + if !charclass(c, "!&") { + fail(c); return FAIL; } + leave(c, P_lookop); return OK; } -// alt <- ( preop? suffix )+ -p_alt(c: *compiler): int { - choice(c); - if p_preop(c) { - commit(c); - } +// alternative <- lookahead+ +p_alternative(c: *peg): int { + enter(c); - if !p_suffix(c) { + if !p_lookahead(c) { + fail(c); return FAIL; } loop { choice(c); - choice(c); - if p_preop(c) { - commit(c); - } - - - if !p_suffix(c) { + if !p_lookahead(c) { break; } commit(c); } + leave(c, P_alternative); return OK; } -// postop <- [*+?] sp -p_postop(c: *compiler): int { - if !charclass(c, "*+?") { +// lookahead <- (lookop sp)? suffix +p_lookahead(c: *peg): int { + enter(c); + + choice(c); + if p_lookop(c) && p_sp(c) { + commit(c); + } + + if !p_suffix(c) { + fail(c); return FAIL; } - if !p_sp(c) { + leave(c, P_lookahead); + return OK; +} + +// countop <- [*+?] +p_countop(c: *peg): int { + enter(c); + + if !charclass(c, "*+?") { + fail(c); return FAIL; } + leave(c, P_countop); return OK; } -// suffix <- primary postop* -p_suffix(c: *compiler): int { +// suffix <- primary (countop sp)* +p_suffix(c: *peg): int { + enter(c); + if !p_primary(c) { + fail(c); return FAIL; } loop { choice(c); - if !p_postop(c) { + if !p_countop(c) { + break; + } + + if !p_sp(c) { break; } commit(c); } + leave(c, P_suffix); return OK; } -// primary <- group / any / literal / charclass / nonterminal -p_primary(c: *compiler): int { - choice(c); - if p_group(c) { - commit(c); - return OK; - } - - choice(c); - if p_any(c) { - commit(c); - return OK; - } +// primary <- ( '(' sp pattern ')' / any / literal / class / call ) sp +p_primary(c: *peg): int { + enter(c); - choice(c); - if p_literal(c) { - commit(c); - return OK; - } - - choice(c); - if p_charclass(c) { - commit(c); - return OK; - } - - choice(c); - if p_nonterminal(c) { - commit(c); - return OK; - } + loop { + choice(c); + if literal(c, "(") && p_sp(c) && p_pattern(c) && literal(c, ")") { + commit(c); + break; + } - fail(c); - return FAIL; -} + choice(c); + if p_any(c) { + commit(c); + break; + } -// group <- '(' sp pattern ')' sp -p_group(c: *compiler): int { - if !literal(c, "(") { - return FAIL; - } + choice(c); + if p_literal(c) { + commit(c); + break; + } - if !p_sp(c) { - return FAIL; - } + choice(c); + if p_class(c) { + commit(c); + break; + } - if !p_pattern(c) { - return FAIL; - } + choice(c); + if p_call(c) { + commit(c); + break; + } - if !literal(c, ")") { + fail(c); + fail(c); return FAIL; } if !p_sp(c) { + fail(c); return FAIL; } + leave(c, P_primary); return OK; } -// any <- '.' sp -p_any(c: *compiler): int { - if !literal(c, ".") { - return FAIL; - } +// any <- '.' +p_any(c: *peg): int { + enter(c); - if !p_sp(c) { + if !literal(c, ".") { + fail(c); return FAIL; } + leave(c, P_any); return OK; } -// literal <- ['] ( !['] . )* ['] sp -p_literal(c: *compiler): int { - choice(c); +// literal <- ['] ( !['] . )* ['] +p_literal(c: *peg): int { + enter(c); + if !literal(c, "'") { + fail(c); return FAIL; } - commit(c); loop { choice(c); @@ -352,26 +487,23 @@ p_literal(c: *compiler): int { commit(c); } - choice(c); if !literal(c, "'") { - return FAIL; - } - commit(c); - - if !p_sp(c) { + fail(c); return FAIL; } + leave(c, P_literal); return OK; } -// charclass <- '[' ( !']' ( . '-' . / . ) )* ']' sp -p_charclass(c: *compiler): int { - choice(c); +// charclass <- '[' ( !']' ( . '-' . / . ) )* ']' +p_class(c: *peg): int { + enter(c); + if !literal(c, "[") { + fail(c); return FAIL; } - commit(c); loop { choice(c); @@ -388,50 +520,52 @@ p_charclass(c: *compiler): int { } choice(c); - if literal(c, "-") { - if any(c) { - commit(c); - } + if literal(c, "-") && any(c) { + commit(c); } commit(c); } - choice(c); if !literal(c, "]") { - return FAIL; - } - commit(c); - - if !p_sp(c) { + fail(c); return FAIL; } + leave(c, P_class); return OK; } -// nonterminal <- ident !'<-' -p_nonterminal(c: *compiler): int { - if !p_ident(c) { +// call <- identifier !'<-' +p_call(c: *peg): int { + enter(c); + + if !p_identifier(c) { + fail(c); return FAIL; } choice(c); - if !literal(c, "<-") { - return OK; + if p_sp(c) && literal(c, "<-") { + fail(c); + fail(c); + fail(c); + return FAIL; } - fail(c); - fail(c); - return FAIL; + + leave(c, P_call); + return OK; } -// ident <- [a-zA-Z]+ sp -p_ident(c: *compiler): int { +// identifier <- [a-zA-Z0-9_]+ +p_identifier(c: *peg): int { var chars: *byte; + enter(c); - chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; if !charclass(c, chars) { + fail(c); return FAIL; } @@ -445,15 +579,14 @@ p_ident(c: *compiler): int { commit(c); } - if !p_sp(c) { - return FAIL; - } - + leave(c, P_identifier); return OK; } -// sp <- ( [ \t\r\n] / '#' ( ![\r\n] . )* )* -p_sp(c: *compiler): int { +// sp <- ( [ \t\r\n] / '//' ( ![\r\n] . )* )* +p_sp(c: *peg): int { + enter(c); + loop { choice(c); @@ -465,9 +598,7 @@ p_sp(c: *compiler): int { } choice(c); - if literal(c, "#") { - commit(c); - + if literal(c, "//") { loop { choice(c); @@ -486,25 +617,29 @@ p_sp(c: *compiler): int { } commit(c); + commit(c); continue; } + fail(c); break; } + leave(c, P_sp); return OK; } main(argc: int, argv: **byte, envp: **byte) { var fd: int; var a: alloc; - var c: compiler; + var c: peg; + var i: int; setup_alloc(&a); c.a = &a; c.pos = 0; c.limit = 1024; - c.stack = alloc(c.a, c.limit * sizeof(c.stack[0])):*int; + c.stack = alloc(c.a, c.limit * sizeof(c.stack[0])):*peg_frame; if argc != 2 { die("usage: ./peg <grammar.peg>"); @@ -517,8 +652,36 @@ main(argc: int, argv: **byte, envp: **byte) { c.f = fopen(fd, c.a); + c.src = freadall(c.f, &c.size); + + choice(&c); if !p_grammar(&c) { - die("FAIL"); + die("Syntax error"); + } + commit(&c); + + i = 0; + loop { + if i == c.op { + break; + } + + if c.out[i].nargs == 0 { + fdputs(1, tag_to_str(c.out[i].tag)); + fdputc(1, ' '); + fdputc(1, '"'); + write(1, &c.src[c.out[i].start], c.out[i].end - c.out[i].start); + fdputc(1, '"'); + fdputc(1, '\n'); + } + + if c.out[i].nargs != 0 { + fdputs(1, tag_to_str(c.out[i].tag)); + fdputc(1, ' '); + fdputd(1, c.out[i].nargs); + fdputc(1, '\n'); + } + + i = i + 1; } - fdputs(1, "OK\n"); } diff --git a/peg.peg b/peg.peg @@ -1,15 +1,15 @@ grammar <- sp rule+ !. -rule <- ident '<-' sp pattern -pattern <- alt ( '/' sp alt )* -preop <- [!&] sp -alt <- ( preop? suffix )+ -postop <- [*+?] sp -suffix <- primary postop* -primary <- group / any / literal / charclass / nonterminal -group <- '(' sp pattern ')' sp -any <- '.' sp -literal <- ['] ( !['] . )* ['] sp -charclass <- '[' ( !']' ( . '-' . / . ) )* ']' sp -nonterminal <- ident !'<-' -ident <- [a-zA-Z]+ sp -sp <- ( [ \t\r\n] / '#' ( ![\r\n] . )* )* +rule <- identifier sp '<-' sp pattern +pattern <- alternative ( '/' !'/' sp alternative )* +alternative <- lookahead+ +lookop <- [!&] +lookahead <- (lookop sp)? suffix +countop <- [*+?] +suffix <- primary (countop sp)* +primary <- ( '(' sp pattern ')' / any / literal / class / call ) sp +any <- '.' +literal <- ['] ( !['] . )* ['] +class <- '[' ( !']' ( . '-' . / . ) )* ']' +call <- identifier !(sp '<-') +identifier <- [a-zA-Z0-9_]+ +sp <- ( [ \t\r\n] / '//' ( ![\r\n] . )* )*