commit 28476526f86abe770d02adb0c6cc86867e363133
parent ef9f830ae2d8d4f17258b50c16c7a75304637077
Author: erai <erai@omiltem.net>
Date: Sat, 14 Sep 2024 18:22:18 -0400
basic peg syntax error location
Diffstat:
M | cc3.c | | | 2 | +- |
M | parsepeg.c | | | 36 | ++++++++++++++++++------------------ |
M | peg.c | | | 40 | ++++++++++++++++++++++++++++++++++------ |
M | peglib.c | | | 83 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
D | pxe.sh | | | 5 | ----- |
M | watch.sh | | | 2 | +- |
6 files changed, 134 insertions(+), 34 deletions(-)
diff --git a/cc3.c b/cc3.c
@@ -18,7 +18,7 @@ main(argc: int, argv: **byte, envp: **byte) {
src = freadall(f, &len);
fclose(f);
- peg = peg_new(src, len, &a);
+ peg = peg_new(argv[1], src, len, &a);
node = peg_parse(peg);
peg_free(peg);
diff --git a/parsepeg.c b/parsepeg.c
@@ -37,7 +37,7 @@ tag_to_str(tag: int): *byte {
p_grammar(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_grammar);
ok = p_sp(c);
if ok {
ok = p_rule(c);
@@ -61,7 +61,7 @@ p_grammar(c: *peg): int {
p_rule(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_rule);
ok = p_identifier(c);
if ok {
ok = p_sp(c);
@@ -81,7 +81,7 @@ p_rule(c: *peg): int {
p_pattern(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_pattern);
ok = p_alternative(c);
if ok {
loop {
@@ -108,7 +108,7 @@ p_pattern(c: *peg): int {
p_alternative(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_alternative);
ok = p_lookahead(c);
if ok {
loop {
@@ -124,7 +124,7 @@ p_alternative(c: *peg): int {
p_lookop(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_lookop);
ok = charset(c, "!&");
if ok { leave(c, P_lookop); } else { fail(c); }
return ok;
@@ -132,7 +132,7 @@ p_lookop(c: *peg): int {
p_lookahead(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_lookahead);
choice(c);
ok = p_lookop(c);
if ok {
@@ -148,7 +148,7 @@ p_lookahead(c: *peg): int {
p_countop(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_countop);
ok = charset(c, "*+?");
if ok { leave(c, P_countop); } else { fail(c); }
return ok;
@@ -156,7 +156,7 @@ p_countop(c: *peg): int {
p_suffix(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_suffix);
ok = p_primary(c);
if ok {
loop {
@@ -175,7 +175,7 @@ p_suffix(c: *peg): int {
p_primary(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_primary);
choice(c);
ok = literal(c, "(");
if ok {
@@ -221,7 +221,7 @@ p_primary(c: *peg): int {
p_any(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_any);
ok = literal(c, ".");
if ok { leave(c, P_any); } else { fail(c); }
return ok;
@@ -229,13 +229,13 @@ p_any(c: *peg): int {
p_literal(c: *peg): int {
var ok: int;
- enter(c);
- ok = charset(c, "'");
+ enter(c, P_literal);
+ ok = literal(c, "'");
if ok {
loop {
choice(c);
choice(c);
- ok = charset(c, "'");
+ ok = literal(c, "'");
if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }
if ok {
ok = any(c);
@@ -245,7 +245,7 @@ p_literal(c: *peg): int {
}
}
if ok {
- ok = charset(c, "'");
+ ok = literal(c, "'");
}
if ok { leave(c, P_literal); } else { fail(c); }
return ok;
@@ -253,7 +253,7 @@ p_literal(c: *peg): int {
p_class(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_class);
ok = literal(c, "[");
if ok {
loop {
@@ -291,7 +291,7 @@ p_class(c: *peg): int {
p_call(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_call);
ok = p_identifier(c);
if ok {
choice(c);
@@ -307,7 +307,7 @@ p_call(c: *peg): int {
p_identifier(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_identifier);
ok = charset(c, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
if ok {
loop {
@@ -323,7 +323,7 @@ p_identifier(c: *peg): int {
p_sp(c: *peg): int {
var ok: int;
- enter(c);
+ enter(c, P_sp);
loop {
choice(c);
choice(c);
diff --git a/peg.c b/peg.c
@@ -77,6 +77,7 @@ translate_literal(c: *compiler, n: *peg_node) {
hex = "0123456789abcdef";
+ fputs(c.out, " ok = literal(c, \"");
i = 1;
len = n.len - 1;
loop {
@@ -97,6 +98,7 @@ translate_literal(c: *compiler, n: *peg_node) {
i = i + 1;
}
+ fputs(c.out, "\");\n");
}
hexdig(c: byte): int {
@@ -166,6 +168,7 @@ translate_charset(c: *compiler, n: *peg_node) {
var a: int;
var b: int;
var hex: *byte;
+ var count: int;
hex = "0123456789abcdef";
@@ -217,6 +220,29 @@ translate_charset(c: *compiler, n: *peg_node) {
}
}
+ count = 0;
+ i = 1;
+ loop {
+ if i == 256 {
+ break;
+ }
+ count = count + c.scratch[i]:int;
+ i = i + 1;
+ }
+
+ if count == 0 {
+ fputs(c.out, " fail(c);\n");
+ fputs(c.out, " ok = 0;\n");
+ return;
+ } else if count >= 255 {
+ fputs(c.out, " ok = any(c);\n");
+ return;
+ } else if count == 1 {
+ fputs(c.out, " ok = literal(c, \"");
+ } else {
+ fputs(c.out, " ok = charset(c, \"");
+ }
+
i = 1;
loop {
if i == 256 {
@@ -236,6 +262,7 @@ translate_charset(c: *compiler, n: *peg_node) {
i = i + 1;
}
+ fputs(c.out, "\");\n");
}
translate_pattern(c: *compiler, n: *peg_node) {
@@ -333,13 +360,9 @@ translate_pattern(c: *compiler, n: *peg_node) {
} else if n.tag == P_any {
fputs(c.out, " ok = any(c);\n");
} else if n.tag == P_literal {
- fputs(c.out, " ok = literal(c, \"");
translate_literal(c, n);
- fputs(c.out, "\");\n");
} else if n.tag == P_class {
- fputs(c.out, " ok = charset(c, \"");
translate_charset(c, n);
- fputs(c.out, "\");\n");
} else if n.tag == P_call {
fputs(c.out, " ok = p_");
fputb(c.out, n.child.str, n.child.len);
@@ -410,7 +433,9 @@ translate(c: *compiler, n: *peg_node) {
fputb(c.out, v.child.str, v.child.len);
fputs(c.out, "(c: *peg): int {\n");
fputs(c.out, " var ok: int;\n");
- fputs(c.out, " enter(c);\n");
+ fputs(c.out, " enter(c, P_");
+ fputb(c.out, v.child.str, v.child.len);
+ fputs(c.out, ");\n");
translate_pattern(c, v.child.next);
fputs(c.out, " if ok { leave(c, P_");
fputb(c.out, v.child.str, v.child.len);
@@ -434,10 +459,12 @@ main(argc: int, argv: **byte, envp: **byte) {
var src: *byte;
var len: int;
var node: *peg_node;
+ var filename: *byte;
setup_alloc(&a);
ifd = 0;
ofd = 1;
+ filename = "-";
i = 1;
loop {
@@ -470,6 +497,7 @@ main(argc: int, argv: **byte, envp: **byte) {
die("too many inputs");
}
+ filename = argv[i];
ifd = open(argv[i], 0, 0);
if ifd < 0 {
die("failed to open input");
@@ -488,7 +516,7 @@ main(argc: int, argv: **byte, envp: **byte) {
out = fopen(ofd, c.a);
c.out = out;
- c.p = peg_new(src, len, c.a);
+ c.p = peg_new(filename, src, len, c.a);
node = peg_parse(c.p);
translate(&c, node);
diff --git a/peglib.c b/peglib.c
@@ -2,6 +2,9 @@ struct peg_frame {
pos: int;
depth: int;
op: int;
+ tag: int;
+ line: int;
+ col: int;
}
struct peg_op {
@@ -14,9 +17,20 @@ struct peg_op {
struct peg {
a: *alloc;
+ filename: *byte;
+
src: *byte;
size: int;
pos: int;
+ line: int;
+ col: int;
+ tag: int;
+
+ fail_depth: int;
+ fail_tag: int;
+ fail_line: int;
+ fail_col: int;
+ fail_literal: *byte;
stack: *peg_frame;
sp: int;
@@ -47,6 +61,9 @@ choice(c: *peg) {
c.stack[c.sp].pos = c.pos;
c.stack[c.sp].depth = c.depth;
c.stack[c.sp].op = c.op;
+ c.stack[c.sp].tag = c.tag;
+ c.stack[c.sp].line = c.line;
+ c.stack[c.sp].col = c.col;
c.sp = c.sp + 1;
}
@@ -61,10 +78,22 @@ fail(c: *peg) {
if c.sp == 0 {
die("fail underflow");
}
+
+ if c.depth > c.fail_depth {
+ c.fail_depth = c.depth;
+ c.fail_tag = c.tag;
+ c.fail_line = c.line;
+ c.fail_col = c.col;
+ c.fail_literal = 0:*byte;
+ }
+
c.sp = c.sp - 1;
c.pos = c.stack[c.sp].pos;
c.depth = c.stack[c.sp].depth;
c.op = c.stack[c.sp].op;
+ c.tag = c.stack[c.sp].tag;
+ c.line = c.stack[c.sp].line;
+ c.col = c.stack[c.sp].col;
}
get(c: *peg): int {
@@ -76,6 +105,16 @@ get(c: *peg): int {
ch = c.src[c.pos]:int;
c.pos = c.pos + 1;
+ c.col = c.col + 1;
+
+ if ch == '\n' {
+ c.col = 1;
+ c.line = c.line + 1;
+ }
+
+ if ch == 0 {
+ die("invalid nul in source");
+ }
return ch;
}
@@ -93,6 +132,7 @@ literal(c: *peg, s: *byte): int {
ch = get(c);
if ch != (s[i]:int) {
fail(c);
+ c.fail_literal = s;
return 0;
}
@@ -102,8 +142,9 @@ literal(c: *peg, s: *byte): int {
return 1;
}
-enter(c: *peg) {
+enter(c: *peg, tag: int) {
choice(c);
+ c.tag = tag;
}
leave(c: *peg, tag: int) {
@@ -113,6 +154,7 @@ leave(c: *peg, tag: int) {
var tmp: *byte;
commit(c);
+ c.fail_depth = 0;
nargs = c.depth - c.stack[c.sp].depth;
start = c.stack[c.sp].pos;
@@ -226,16 +268,28 @@ construct(c: *peg): *peg_node {
}
}
-peg_new(src: *byte, len: int, a: *alloc): *peg {
+peg_new(filename: *byte, src: *byte, len: int, a: *alloc): *peg {
var c: *peg;
c = alloc(a, sizeof(*c)):*peg;
c.a = a;
+ c.filename = filename;
+
c.src = src;
c.size = len;
c.pos = 0;
+ c.tag = 0;
+ c.line = 1;
+ c.col = 1;
+ c.tag = 0;
+
+ c.fail_depth = 0;
+ c.fail_tag = 0;
+ c.fail_line = 0;
+ c.fail_col = 0;
+ c.fail_literal = 0:*byte;
c.limit = 1024;
c.stack = alloc(a, c.limit * sizeof(*c.stack)):*peg_frame;
@@ -256,7 +310,21 @@ peg_new(src: *byte, len: int, a: *alloc): *peg {
peg_parse(c: *peg): *peg_node {
choice(c);
if !p_grammar(c) {
- die("syntax error");
+ fdputs(2, "syntax error at ");
+ fdputs(2, c.filename);
+ fdputs(2, ":");
+ fdputd(2, c.fail_line);
+ fdputs(2, ":");
+ fdputd(2, c.fail_col);
+ fdputs(2, " expected ");
+ fdputs(2, tag_to_str(c.fail_tag));
+ if c.fail_literal {
+ fdputs(2, " '");
+ fdputs(2, c.fail_literal);
+ fdputs(2, "'");
+ }
+ fdputs(2, "\n");
+ exit(1);
}
commit(c);
return construct(c);
@@ -266,6 +334,15 @@ peg_reset(c: *peg, src: *byte, len: int) {
c.src = src;
c.size = len;
c.pos = 0;
+ c.tag = 0;
+ c.line = 1;
+ c.col = 1;
+ c.tag = 0;
+ c.fail_depth = 0;
+ c.fail_tag = 0;
+ c.fail_line = 0;
+ c.fail_col = 0;
+ c.fail_literal = 0:*byte;
c.depth = 0;
c.sp = 0;
c.op = 0;
diff --git a/pxe.sh b/pxe.sh
@@ -1,5 +0,0 @@
-#!/bin/sh
-./bootstrap.sh || exit 1
-nasm -f bin pxe.asm || exit 1
-cp kernel pxe /srv/tftp/ || exit 1
-printf r > /dev/ttyACM0 || exit 1
diff --git a/watch.sh b/watch.sh
@@ -1,6 +1,6 @@
#!/bin/sh
clear
-./pxe.sh
+sh -e ./build.sh
echo status $?
: < ~/.post
exec "$0"