os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 28476526f86abe770d02adb0c6cc86867e363133
parent ef9f830ae2d8d4f17258b50c16c7a75304637077
Author: erai <erai@omiltem.net>
Date:   Sat, 14 Sep 2024 18:22:18 -0400

basic peg syntax error location

Diffstat:
Mcc3.c | 2+-
Mparsepeg.c | 36++++++++++++++++++------------------
Mpeg.c | 40++++++++++++++++++++++++++++++++++------
Mpeglib.c | 83++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Dpxe.sh | 5-----
Mwatch.sh | 2+-
6 files changed, 134 insertions(+), 34 deletions(-)

diff --git a/cc3.c b/cc3.c @@ -18,7 +18,7 @@ main(argc: int, argv: **byte, envp: **byte) { src = freadall(f, &len); fclose(f); - peg = peg_new(src, len, &a); + peg = peg_new(argv[1], src, len, &a); node = peg_parse(peg); peg_free(peg); diff --git a/parsepeg.c b/parsepeg.c @@ -37,7 +37,7 @@ tag_to_str(tag: int): *byte { p_grammar(c: *peg): int { var ok: int; - enter(c); + enter(c, P_grammar); ok = p_sp(c); if ok { ok = p_rule(c); @@ -61,7 +61,7 @@ p_grammar(c: *peg): int { p_rule(c: *peg): int { var ok: int; - enter(c); + enter(c, P_rule); ok = p_identifier(c); if ok { ok = p_sp(c); @@ -81,7 +81,7 @@ p_rule(c: *peg): int { p_pattern(c: *peg): int { var ok: int; - enter(c); + enter(c, P_pattern); ok = p_alternative(c); if ok { loop { @@ -108,7 +108,7 @@ p_pattern(c: *peg): int { p_alternative(c: *peg): int { var ok: int; - enter(c); + enter(c, P_alternative); ok = p_lookahead(c); if ok { loop { @@ -124,7 +124,7 @@ p_alternative(c: *peg): int { p_lookop(c: *peg): int { var ok: int; - enter(c); + enter(c, P_lookop); ok = charset(c, "!&"); if ok { leave(c, P_lookop); } else { fail(c); } return ok; @@ -132,7 +132,7 @@ p_lookop(c: *peg): int { p_lookahead(c: *peg): int { var ok: int; - enter(c); + enter(c, P_lookahead); choice(c); ok = p_lookop(c); if ok { @@ -148,7 +148,7 @@ p_lookahead(c: *peg): int { p_countop(c: *peg): int { var ok: int; - enter(c); + enter(c, P_countop); ok = charset(c, "*+?"); if ok { leave(c, P_countop); } else { fail(c); } return ok; @@ -156,7 +156,7 @@ p_countop(c: *peg): int { p_suffix(c: *peg): int { var ok: int; - enter(c); + enter(c, P_suffix); ok = p_primary(c); if ok { loop { @@ -175,7 +175,7 @@ p_suffix(c: *peg): int { p_primary(c: *peg): int { var ok: int; - enter(c); + enter(c, P_primary); choice(c); ok = literal(c, "("); if ok { @@ -221,7 +221,7 @@ p_primary(c: *peg): int { p_any(c: *peg): int { var ok: int; - enter(c); + enter(c, P_any); ok = literal(c, "."); if ok { leave(c, P_any); } else { fail(c); } return ok; @@ -229,13 +229,13 @@ p_any(c: *peg): int { p_literal(c: *peg): int { var ok: int; - enter(c); - ok = charset(c, "'"); + enter(c, P_literal); + ok = literal(c, "'"); if ok { loop { choice(c); choice(c); - ok = charset(c, "'"); + ok = literal(c, "'"); if ok { fail(c); fail(c); ok = 0; } else { ok = 1; } if ok { ok = any(c); @@ -245,7 +245,7 @@ p_literal(c: *peg): int { } } if ok { - ok = charset(c, "'"); + ok = literal(c, "'"); } if ok { leave(c, P_literal); } else { fail(c); } return ok; @@ -253,7 +253,7 @@ p_literal(c: *peg): int { p_class(c: *peg): int { var ok: int; - enter(c); + enter(c, P_class); ok = literal(c, "["); if ok { loop { @@ -291,7 +291,7 @@ p_class(c: *peg): int { p_call(c: *peg): int { var ok: int; - enter(c); + enter(c, P_call); ok = p_identifier(c); if ok { choice(c); @@ -307,7 +307,7 @@ p_call(c: *peg): int { p_identifier(c: *peg): int { var ok: int; - enter(c); + enter(c, P_identifier); ok = charset(c, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"); if ok { loop { @@ -323,7 +323,7 @@ p_identifier(c: *peg): int { p_sp(c: *peg): int { var ok: int; - enter(c); + enter(c, P_sp); loop { choice(c); choice(c); diff --git a/peg.c b/peg.c @@ -77,6 +77,7 @@ translate_literal(c: *compiler, n: *peg_node) { hex = "0123456789abcdef"; + fputs(c.out, " ok = literal(c, \""); i = 1; len = n.len - 1; loop { @@ -97,6 +98,7 @@ translate_literal(c: *compiler, n: *peg_node) { i = i + 1; } + fputs(c.out, "\");\n"); } hexdig(c: byte): int { @@ -166,6 +168,7 @@ translate_charset(c: *compiler, n: *peg_node) { var a: int; var b: int; var hex: *byte; + var count: int; hex = "0123456789abcdef"; @@ -217,6 +220,29 @@ translate_charset(c: *compiler, n: *peg_node) { } } + count = 0; + i = 1; + loop { + if i == 256 { + break; + } + count = count + c.scratch[i]:int; + i = i + 1; + } + + if count == 0 { + fputs(c.out, " fail(c);\n"); + fputs(c.out, " ok = 0;\n"); + return; + } else if count >= 255 { + fputs(c.out, " ok = any(c);\n"); + return; + } else if count == 1 { + fputs(c.out, " ok = literal(c, \""); + } else { + fputs(c.out, " ok = charset(c, \""); + } + i = 1; loop { if i == 256 { @@ -236,6 +262,7 @@ translate_charset(c: *compiler, n: *peg_node) { i = i + 1; } + fputs(c.out, "\");\n"); } translate_pattern(c: *compiler, n: *peg_node) { @@ -333,13 +360,9 @@ translate_pattern(c: *compiler, n: *peg_node) { } else if n.tag == P_any { fputs(c.out, " ok = any(c);\n"); } else if n.tag == P_literal { - fputs(c.out, " ok = literal(c, \""); translate_literal(c, n); - fputs(c.out, "\");\n"); } else if n.tag == P_class { - fputs(c.out, " ok = charset(c, \""); translate_charset(c, n); - fputs(c.out, "\");\n"); } else if n.tag == P_call { fputs(c.out, " ok = p_"); fputb(c.out, n.child.str, n.child.len); @@ -410,7 +433,9 @@ translate(c: *compiler, n: *peg_node) { fputb(c.out, v.child.str, v.child.len); fputs(c.out, "(c: *peg): int {\n"); fputs(c.out, " var ok: int;\n"); - fputs(c.out, " enter(c);\n"); + fputs(c.out, " enter(c, P_"); + fputb(c.out, v.child.str, v.child.len); + fputs(c.out, ");\n"); translate_pattern(c, v.child.next); fputs(c.out, " if ok { leave(c, P_"); fputb(c.out, v.child.str, v.child.len); @@ -434,10 +459,12 @@ main(argc: int, argv: **byte, envp: **byte) { var src: *byte; var len: int; var node: *peg_node; + var filename: *byte; setup_alloc(&a); ifd = 0; ofd = 1; + filename = "-"; i = 1; loop { @@ -470,6 +497,7 @@ main(argc: int, argv: **byte, envp: **byte) { die("too many inputs"); } + filename = argv[i]; ifd = open(argv[i], 0, 0); if ifd < 0 { die("failed to open input"); @@ -488,7 +516,7 @@ main(argc: int, argv: **byte, envp: **byte) { out = fopen(ofd, c.a); c.out = out; - c.p = peg_new(src, len, c.a); + c.p = peg_new(filename, src, len, c.a); node = peg_parse(c.p); translate(&c, node); diff --git a/peglib.c b/peglib.c @@ -2,6 +2,9 @@ struct peg_frame { pos: int; depth: int; op: int; + tag: int; + line: int; + col: int; } struct peg_op { @@ -14,9 +17,20 @@ struct peg_op { struct peg { a: *alloc; + filename: *byte; + src: *byte; size: int; pos: int; + line: int; + col: int; + tag: int; + + fail_depth: int; + fail_tag: int; + fail_line: int; + fail_col: int; + fail_literal: *byte; stack: *peg_frame; sp: int; @@ -47,6 +61,9 @@ choice(c: *peg) { c.stack[c.sp].pos = c.pos; c.stack[c.sp].depth = c.depth; c.stack[c.sp].op = c.op; + c.stack[c.sp].tag = c.tag; + c.stack[c.sp].line = c.line; + c.stack[c.sp].col = c.col; c.sp = c.sp + 1; } @@ -61,10 +78,22 @@ fail(c: *peg) { if c.sp == 0 { die("fail underflow"); } + + if c.depth > c.fail_depth { + c.fail_depth = c.depth; + c.fail_tag = c.tag; + c.fail_line = c.line; + c.fail_col = c.col; + c.fail_literal = 0:*byte; + } + c.sp = c.sp - 1; c.pos = c.stack[c.sp].pos; c.depth = c.stack[c.sp].depth; c.op = c.stack[c.sp].op; + c.tag = c.stack[c.sp].tag; + c.line = c.stack[c.sp].line; + c.col = c.stack[c.sp].col; } get(c: *peg): int { @@ -76,6 +105,16 @@ get(c: *peg): int { ch = c.src[c.pos]:int; c.pos = c.pos + 1; + c.col = c.col + 1; + + if ch == '\n' { + c.col = 1; + c.line = c.line + 1; + } + + if ch == 0 { + die("invalid nul in source"); + } return ch; } @@ -93,6 +132,7 @@ literal(c: *peg, s: *byte): int { ch = get(c); if ch != (s[i]:int) { fail(c); + c.fail_literal = s; return 0; } @@ -102,8 +142,9 @@ literal(c: *peg, s: *byte): int { return 1; } -enter(c: *peg) { +enter(c: *peg, tag: int) { choice(c); + c.tag = tag; } leave(c: *peg, tag: int) { @@ -113,6 +154,7 @@ leave(c: *peg, tag: int) { var tmp: *byte; commit(c); + c.fail_depth = 0; nargs = c.depth - c.stack[c.sp].depth; start = c.stack[c.sp].pos; @@ -226,16 +268,28 @@ construct(c: *peg): *peg_node { } } -peg_new(src: *byte, len: int, a: *alloc): *peg { +peg_new(filename: *byte, src: *byte, len: int, a: *alloc): *peg { var c: *peg; c = alloc(a, sizeof(*c)):*peg; c.a = a; + c.filename = filename; + c.src = src; c.size = len; c.pos = 0; + c.tag = 0; + c.line = 1; + c.col = 1; + c.tag = 0; + + c.fail_depth = 0; + c.fail_tag = 0; + c.fail_line = 0; + c.fail_col = 0; + c.fail_literal = 0:*byte; c.limit = 1024; c.stack = alloc(a, c.limit * sizeof(*c.stack)):*peg_frame; @@ -256,7 +310,21 @@ peg_new(src: *byte, len: int, a: *alloc): *peg { peg_parse(c: *peg): *peg_node { choice(c); if !p_grammar(c) { - die("syntax error"); + fdputs(2, "syntax error at "); + fdputs(2, c.filename); + fdputs(2, ":"); + fdputd(2, c.fail_line); + fdputs(2, ":"); + fdputd(2, c.fail_col); + fdputs(2, " expected "); + fdputs(2, tag_to_str(c.fail_tag)); + if c.fail_literal { + fdputs(2, " '"); + fdputs(2, c.fail_literal); + fdputs(2, "'"); + } + fdputs(2, "\n"); + exit(1); } commit(c); return construct(c); @@ -266,6 +334,15 @@ peg_reset(c: *peg, src: *byte, len: int) { c.src = src; c.size = len; c.pos = 0; + c.tag = 0; + c.line = 1; + c.col = 1; + c.tag = 0; + c.fail_depth = 0; + c.fail_tag = 0; + c.fail_line = 0; + c.fail_col = 0; + c.fail_literal = 0:*byte; c.depth = 0; c.sp = 0; c.op = 0; diff --git a/pxe.sh b/pxe.sh @@ -1,5 +0,0 @@ -#!/bin/sh -./bootstrap.sh || exit 1 -nasm -f bin pxe.asm || exit 1 -cp kernel pxe /srv/tftp/ || exit 1 -printf r > /dev/ttyACM0 || exit 1 diff --git a/watch.sh b/watch.sh @@ -1,6 +1,6 @@ #!/bin/sh clear -./pxe.sh +sh -e ./build.sh echo status $? : < ~/.post exec "$0"