os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit f41e941fcb1c8874dfb9a0563ea12d93df22c56e
parent 4b035e71c821cbbc8b39676aa1d3707f934e3d4d
Author: erai <erai@omiltem.net>
Date:   Wed,  5 Feb 2025 23:32:25 +0000

Generate c code from ir

Diffstat:
Mbootstrap.sh | 2+-
Mcc1.om | 8+++-----
Mdecl.om | 1+
Mir.om | 55+++++++++++++++++++++++++++++++------------------------
Aircout.om | 531+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 567 insertions(+), 30 deletions(-)

diff --git a/bootstrap.sh b/bootstrap.sh @@ -2,7 +2,7 @@ BOOTSTRAP="cc0.c" LIBS="bufio.om lib.om alloc.om syscall.om" -SOURCES="cc1.om type.om parse2.om parse3.om peglib.om as.om decl.om node.om cout.om peg.om parsepeg.om ir.om" +SOURCES="cc1.om type.om parse2.om parse3.om peglib.om as.om decl.om node.om peg.om parsepeg.om ir.om ircout.om" # Build the bootstrap compiler from c gcc -std=c99 ${BOOTSTRAP} -o cc0 diff --git a/cc1.om b/cc1.om @@ -98,10 +98,6 @@ func open_coutput(c: *compiler, filename: *byte) { c.cout = fopen(fd, c.a); } -func flush_coutput(c: *compiler) { - fflush(c.cout); -} - func compile(c: *compiler, p: *node) { var n: *node; var d: *decl; @@ -504,6 +500,7 @@ func defstruct(c: *compiler, n: *node) { d.struct_defined = 1; d.struct_def = n; + d.struct_type = prototype(c, n.a); } func defunion(c: *compiler, n: *node) { @@ -528,6 +525,7 @@ func defunion(c: *compiler, n: *node) { d.struct_defined = 1; d.struct_def = n; + d.struct_type = prototype(c, n.a); } func defenum(c: *compiler, n: *node) { @@ -2332,7 +2330,7 @@ func main(argc: int, argv: **byte, envp: **byte) { if c.do_cout { open_coutput(c, filename); - ctranslate(c); + ircout(c); return; } diff --git a/decl.om b/decl.om @@ -22,6 +22,7 @@ struct decl { struct_size: int; struct_layout_done: int; struct_def: *node; + struct_type: *type; member_defined: int; member_type: *type; diff --git a/ir.om b/ir.om @@ -1,6 +1,7 @@ enum { // Places IOP_VAR, + IOP_VARREF, IOP_FUNC, IOP_CONST, IOP_STR, @@ -8,7 +9,6 @@ enum { // Memory operations IOP_LOAD, IOP_STORE, - IOP_REF, // Calling convention IOP_RETVAL, @@ -61,6 +61,7 @@ struct irop { // A basic block is a sequence of expressions that end with a branch struct irblock { + n: int; ops: **irop; ops_len: int; ops_cap: int; @@ -90,7 +91,6 @@ struct irvar { right: *irvar; t: *type; n: int; - reify: int; offset: int; mark: int; } @@ -139,6 +139,7 @@ func mkirblock(ic: *irfunc): *irblock { b = alloc(ic.a, sizeof(*b)) as *irblock; + b.n = ic.blocks_len; b.label = mklabel(ic.s); b.ops = nil; b.ops_len = 0; @@ -505,7 +506,14 @@ func expr_to_ir(ic: *irfunc, n: *node): *irop { v = find(ic.c, n.a.t.val.st.name, n.b.s, 0); } else { a = expr_to_ir(ic, n.a); - b = mkirop(ic, IOP_REF, a, nil); + if a.kind == IOP_VAR { + b = mkirop(ic, IOP_VARREF, nil, nil); + b.n = a.n; + } else if a.kind == IOP_LOAD { + b = a.a; + } else { + die("invalid ref"); + } v = find(ic.c, n.a.t.st.name, n.b.s, 0); } c = mkirconst(ic, v.member_offset); @@ -515,8 +523,16 @@ func expr_to_ir(ic: *irfunc, n: *node): *irop { return o; } else if kind == N_REF { a = expr_to_ir(ic, n.a); - o = mkirop(ic, IOP_REF, a, nil); - return o; + if a.kind == IOP_VAR { + o = mkirop(ic, IOP_VARREF, nil, nil); + o.n = a.n; + return o; + } else if a.kind == IOP_LOAD { + return a.a; + } else { + die("invalid ref"); + return nil; + } } else if kind == N_DEREF { a = expr_to_ir(ic, n.a); o = mkirop(ic, IOP_LOAD, a, nil); @@ -1154,6 +1170,10 @@ func irshow4(out: *file, o: *irop) { fputs(out, "(var "); fputd(out, o.n); fputs(out, ")"); + } else if kind == IOP_VARREF { + fputs(out, "(varref "); + fputd(out, o.n); + fputs(out, ")"); } else if kind == IOP_FUNC { fputs(out, "(func "); fputs(out, o.s); @@ -1174,10 +1194,6 @@ func irshow4(out: *file, o: *irop) { fputs(out, " "); irshow4(out, o.b); fputs(out, ")"); - } else if kind == IOP_REF { - fputs(out, "(ref "); - irshow4(out, o.a); - fputs(out, ")"); } else if kind == IOP_RETVAL { fputs(out, "(retval "); irshow4(out, o.a); @@ -1924,6 +1940,8 @@ func output_irexpr(ic: *irfunc, b: *irblock, o: *irop) { kind = o.kind; if kind == IOP_VAR { as_modrm(ic.s, OP_LOAD, R_RAX, R_RBP, 0, 0, ic.vars[o.n].offset); + } else if kind == IOP_VARREF { + as_modrm(ic.s, OP_LEA, R_RAX, R_RBP, 0, 0, ic.vars[o.n].offset); } else if kind == IOP_FUNC { output_irfuncptr(ic, b, o); } else if kind == IOP_CONST { @@ -1947,15 +1965,6 @@ func output_irexpr(ic: *irfunc, b: *irblock, o: *irop) { } else { cdie(ic.c, "invalid load"); } - } else if kind == IOP_REF { - kind = o.a.kind; - if kind == IOP_LOAD { - output_irexpr(ic, b, o.a.a); - } else if kind == IOP_VAR { - as_modrm(ic.s, OP_LEA, R_RAX, R_RBP, 0, 0, ic.vars[o.a.n].offset); - } else { - cdie(ic.c, "invalid ref"); - } } else if kind == IOP_NEG { output_irexpr(ic, b, o.a); as_modr(ic.s, OP_NEGM, R_RAX); @@ -2162,15 +2171,13 @@ func irexpr_fold(ic: *irfunc, o: *irop): *irop { kind = o.kind; - // Eliminate *& and &* redundant operations - if (kind == IOP_REF && a.kind == IOP_LOAD) || (kind == IOP_LOAD && a.kind == IOP_REF) { - // This could be a type pun, so make a new op with a new type - ret = mkirop(ic, a.a.kind, a.a.a, a.a.b); + // Eliminate *& redundant operations + if (kind == IOP_LOAD && a.kind == IOP_VARREF) { + ret = mkirop(ic, IOP_VAR, nil, nil); + ret.n = o.n; ret.filename = o.filename; ret.lineno = o.lineno; ret.colno = o.colno; - ret.t = o.t; - ret.n = a.a.n; return ret; } diff --git a/ircout.om b/ircout.om @@ -0,0 +1,531 @@ +func ircout(c: *compiler) { + var d: *decl; + + // Forward declare used functions + d = first_decl(c); + loop { + if !d { + break; + } + + if d.func_used && d.func_defined { + ircforward(c, d); + } + + d = next_decl(c, d); + } + + // Real entry point to translated entry point + fputs(c.cout, "int main(int argc, char **argv) {\n"); + fputs(c.cout, "\tmy__start((unsigned long)argc, (unsigned long)argv, 0);\n"); + fputs(c.cout, "\treturn 0;\n"); + fputs(c.cout, "}\n"); + fputs(c.cout, "long syscall(long nr, ...);\n"); + fputs(c.cout, "unsigned long my_syscall(unsigned long nr, unsigned long a, unsigned long b, unsigned long c, unsigned long d, unsigned long e, unsigned long f) {\n"); + fputs(c.cout, "\treturn (unsigned long)syscall((long)nr, (long)a, (long)b, (long)c, (long)d, (long)e, (long)f);\n"); + fputs(c.cout, "}\n"); + + // Define used functions + d = first_decl(c); + loop { + if !d { + break; + } + + if d.func_used && d.func_defined { + ircdefine(c, d); + } + + d = next_decl(c, d); + } + + fflush(c.cout); +} + +func ircdecl(c: *compiler, ic: *irfunc) { + var i: int; + var iv: *irvar; + + fputs(c.cout, "unsigned long my_"); + fputs(c.cout, ic.name); + fputs(c.cout, "("); + + i = 0; + loop { + if i == ic.arg_count { + break; + } + + iv = ic.vars[i]; + + fputs(c.cout, "unsigned long v_"); + fputs(c.cout, iv.name); + + i = i + 1; + + if i != ic.arg_count { + fputs(c.cout, ", "); + } + } + + fputs(c.cout, ")"); +} + +func ircforward(c: *compiler, d: *decl) { + var ic: *irfunc; + + ic = d.func_ir; + if !ic { + return; + } + + ircdecl(c, ic); + fputs(c.cout, ";\n"); +} + +func ircdefine(c: *compiler, d: *decl) { + var i: int; + var ic: *irfunc; + var iv: *irvar; + var top: *irblock; + var size: int; + var id: int; + + ic = d.func_ir; + if !ic { + return; + } + + ircdecl(c, ic); + fputs(c.cout, " {\n"); + + i = ic.arg_count; + loop { + if i == ic.vars_len { + break; + } + + iv = ic.vars[i]; + + fputs(c.cout, "\tunsigned long "); + + if iv.name { + fputs(c.cout, "v_"); + fputs(c.cout, iv.name); + } else { + fputs(c.cout, "v"); + fputd(c.cout, i); + } + + if iv.t && iv.t.kind != TY_VOID { + size = type_sizeof(c, iv.t); + } else { + size = sizeof(i); + } + + if size == sizeof(i) { + fputs(c.cout, " = 0"); + } else { + fputs(c.cout, "["); + fputd(c.cout, (size + 7) / 8); + fputs(c.cout, "] = {0}"); + } + + fputs(c.cout, ";\n"); + + i = i + 1; + } + + top = ic.blocks[0]; + ircbody(c, ic, top); + + fputs(c.cout, "}\n"); +} + +func ircbody(c: *compiler, ic: *irfunc, ib: *irblock) { + if !ib || ib.mark { + return; + } + + ib.mark = 1; + + ircblock(c, ic, ib); + + if ib.out { + ircbody(c, ic, ib.out); + } + + if ib.alt { + ircbody(c, ic, ib.alt); + } +} + +func ircblock(c: *compiler, ic: *irfunc, ib: *irblock) { + var i: int; + var op: *irop; + var kind: int; + + fputs(c.cout, "b"); + fputd(c.cout, ib.n); + fputs(c.cout, ":\n"); + + loop { + if i + 1 == ib.ops_len { + break; + } + + op = ib.ops[i]; + + kind = op.kind; + if kind == IOP_ARG || kind == IOP_RETVAL { + i = i + 1; + continue; + } + + ircline(c, op); + + fputs(c.cout, "\t"); + ircop(c, ic, ib, op); + fputs(c.cout, ";\n"); + + i = i + 1; + } + + op = ib.ops[i]; + + kind = op.kind; + if kind == IOP_CALL { + irccall(c, ic, ib, op); + } else if kind == IOP_JUMP { + ircline(c, op); + fputs(c.cout, "\tgoto b"); + fputd(c.cout, ib.out.n); + fputs(c.cout, ";\n"); + } else if kind == IOP_BRANCH { + ircline(c, op); + fputs(c.cout, "\tif (!("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) {\n"); + + ircline(c, op); + fputs(c.cout, "\t\tgoto b"); + fputd(c.cout, ib.alt.n); + fputs(c.cout, ";\n"); + + ircline(c, op); + fputs(c.cout, "\t}\n"); + + if ib.out.mark { + ircline(c, op); + fputs(c.cout, "\tgoto b"); + fputd(c.cout, ib.out.n); + fputs(c.cout, ";"); + } + } else if kind == IOP_RETURN { + ircline(c, op); + fputs(c.cout, "\treturn "); + ircop(c, ic, ib, op.a); + fputs(c.cout, ";\n"); + } else { + die("invalid op"); + } +} + +func ircline(c: *compiler, op: *irop) { + if !op.filename { + return; + } + + fputs(c.cout, "#line "); + fputd(c.cout, op.lineno); + fputs(c.cout, " "); + ircstr(c, op.filename, strlen(op.filename)); + fputs(c.cout, "\n"); +} + +func ircstr(c: *compiler, s: *byte, n: int) { + var i: int; + var ch: int; + + i = 0; + fputs(c.cout, "\""); + loop { + if i == n { + break; + } + + ch = s[i] as int; + + if ch < 32 || ch > 127 || ch == '\\' || ch == '"' { + fputc(c.cout, '\\'); + fputc(c.cout, '0' + (ch >> 6) & 7); + fputc(c.cout, '0' + (ch >> 3) & 7); + fputc(c.cout, '0' + ch & 7); + } else { + fputc(c.cout, ch); + } + + i = i + 1; + } + fputs(c.cout, "\""); +} + +func ircop(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) { + var kind: int; + var iv: *irvar; + var size: int; + + kind = op.kind; + if kind == IOP_VAR || kind == IOP_VARREF { + iv = ic.vars[op.n]; + + if iv.t && iv.t.kind != TY_VOID { + size = type_sizeof(c, iv.t); + } else { + size = sizeof(size); + } + + if size == sizeof(size) { + if kind == IOP_VARREF { + fputs(c.cout, "(unsigned long)&"); + } + } else { + if kind == IOP_VARREF { + fputs(c.cout, "(unsigned long)"); + } else { + fputs(c.cout, "*"); + } + } + + if iv.name { + fputs(c.cout, "v_"); + fputs(c.cout, iv.name); + } else { + fputs(c.cout, "v"); + fputd(c.cout, op.n); + } + } else if kind == IOP_FUNC { + fputs(c.cout, "(unsigned long)&my_"); + fputs(c.cout, op.s); + } else if kind == IOP_CONST { + fputd(c.cout, op.n); + fputs(c.cout, "UL"); + } else if kind == IOP_STR { + fputs(c.cout, "(unsigned long)"); + ircstr(c, op.s, op.slen); + } else if kind == IOP_LOAD { + if op.t.kind == TY_BYTE { + fputs(c.cout, "(unsigned long)*(unsigned char *)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")"); + } else { + fputs(c.cout, "*(unsigned long *)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")"); + } + } else if kind == IOP_STORE { + if op.a.kind == IOP_VAR { + ircop(c, ic, ib, op.a); + fputs(c.cout, " = "); + ircop(c, ic, ib, op.b); + } else if op.a.kind == IOP_LOAD { + if op.t.kind == TY_BYTE { + fputs(c.cout, "*(unsigned char *)("); + } else { + fputs(c.cout, "*(unsigned long *)("); + } + ircop(c, ic, ib, op.a.a); + fputs(c.cout, ") = "); + ircop(c, ic, ib, op.b); + } else { + die("invalid store"); + } + } else if kind == IOP_NEG { + fputs(c.cout, "-("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")"); + } else if kind == IOP_NOT { + fputs(c.cout, "~("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")"); + } else if kind == IOP_ADD { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") + ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_AND { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") & ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_OR { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") | ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_XOR { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") ^ ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_DIV { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) / ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else if kind == IOP_MOD { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) % ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else if kind == IOP_LSH { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") << ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_RSH { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") >> ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_MUL { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") * ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_SUB { + fputs(c.cout, "("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ") - ("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")"); + } else if kind == IOP_EQ { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) == ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else if kind == IOP_NE { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) != ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else if kind == IOP_GT { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) > ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else if kind == IOP_GE { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) >= ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else if kind == IOP_LT { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) < ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else if kind == IOP_LE { + fputs(c.cout, "(unsigned long)(((long)("); + ircop(c, ic, ib, op.a); + fputs(c.cout, ")) <= ((long)("); + ircop(c, ic, ib, op.b); + fputs(c.cout, ")))"); + } else { + die("invalid op"); + } +} + +func irccall(c: *compiler, ic: *irfunc, ib: *irblock, op: *irop) { + ircline(c, op); + + fputs(c.cout, "\t"); + ircretval(c, ic, ib); + fputs(c.cout, "((unsigned long (*)())("); + ircop(c, ic, ib, op.a); + fputs(c.cout, "))("); + ircargs(c, ic, ib, op.n); + fputs(c.cout, ");\n"); + + if ib.out.mark { + ircline(c, op); + fputs(c.cout, "\tgoto b"); + fputd(c.cout, ib.out.n); + fputs(c.cout, ";"); + } +} + +func ircretval(c: *compiler, ic: *irfunc, ib: *irblock) { + var i: int; + var op: *irop; + var kind: int; + + i = 0; + loop { + if i == ib.ops_len { + break; + } + + op = ib.ops[i]; + if op.kind == IOP_RETVAL { + ircop(c, ic, ib, op.a); + fputs(c.cout, " = "); + break; + } + + i = i + 1; + } +} + +func ircargs(c: *compiler, ic: *irfunc, ib: *irblock, n: int) { + var i: int; + var op: *irop; + var kind: int; + var args: **irop; + + args = alloc(c.a, n * sizeof(*args)) as **irop; + + i = 0; + loop { + if i == ib.ops_len { + break; + } + + op = ib.ops[i]; + if op.kind == IOP_ARG { + args[op.n] = op.a; + } + + i = i + 1; + } + + i = 0; + loop { + if i == n { + break; + } + + ircop(c, ic, ib, args[i]); + + i = i + 1; + + if i != n { + fputs(c.cout, ", "); + } + } + + free(c.a, args as *byte); +}