os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 50d022beb7584de8b42c5802c06b23dcb9c9363a
parent 3b99296b6fe39b4859c10e4ef588b1f29baa4352
Author: erai <erai@omiltem.net>
Date:   Sun,  2 Jun 2024 13:20:29 -0400

userspace pages and elf

Diffstat:
Mas.c | 4++--
Mkernel.c | 610++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 580 insertions(+), 34 deletions(-)

diff --git a/as.c b/as.c @@ -423,7 +423,7 @@ emit_kstart(c: *assembler) { // pt3 -> 1g as_modri(c, OP_SUBI, R_RSP, 0x1000); - as_modri(c, OP_MOVI, R_RAX, 0x87); + as_modri(c, OP_MOVI, R_RAX, 0x83); as_modri(c, OP_MOVI, R_RDX, 0); as_modrm(c, OP_STORE, R_RAX, R_RSP, 0, 0, 0); as_modrm(c, OP_STORE, R_RDX, R_RSP, 0, 0, 4); @@ -433,7 +433,7 @@ emit_kstart(c: *assembler) { // pt4 -> pt3 as_modrr(c, OP_MOVE, R_RAX, R_RSP); as_modri(c, OP_SUBI, R_RSP, 0x1000); - as_modri(c, OP_ORI, R_RAX, 7); + as_modri(c, OP_ORI, R_RAX, 3); as_modri(c, OP_MOVI, R_RDX, 0); as_modrm(c, OP_STORE, R_RAX, R_RSP, 0, 0, 0); as_modrm(c, OP_STORE, R_RDX, R_RSP, 0, 0, 4); diff --git a/kernel.c b/kernel.c @@ -558,14 +558,14 @@ map_pci(pa: int): *byte { global = g(); global.mmio = global.mmio - (1 << 31); va = global.mmio; - pt4p = rdcr3(); + pt4p = global.kpt; pt4 = ptov(pt4p):*int; v2 = ((va: int) >> 30) & 511; pt3 = ptov(pt4[511] & -4096):*int; - flags = 0x97; + flags = 0x93; pt3[v2] = (pa & -(1 << 30)) | flags; pt3[v2 + 1] = ((pa + (1 << 30)) & -(1 << 30)) | flags; - wrcr3(pt4p); + invlpt(); return (va + (pa & ((1 << 30) - 1))):*byte; } @@ -873,11 +873,11 @@ struct task { name: *byte; files: **vfile; stack: *byte; - ustack: *byte; dead: int; f: (func(t: *task)); a: *void; regs: regs; + pt: int; } struct global { @@ -886,6 +886,7 @@ struct global { vga: vga; fr: *free_range; fp: *free_page; + kpt: int; lapicp: int; lapic: *byte; mmio: int; @@ -1115,6 +1116,9 @@ direct_map(brk: *int) { var map_size: int; var i: int; var n: int; + var global: *global; + + global = g(); map_size = 1 << 46; page_size = 1 << 30; @@ -1122,7 +1126,7 @@ direct_map(brk: *int) { va = -1 << 47; pa = 0; - pt4p = rdcr3(); + pt4p = global.kpt; pt4 = ptov(pt4p):*int; brk[0] = (brk[0] + 4095) & -4096; @@ -1139,7 +1143,7 @@ direct_map(brk: *int) { pt3p = (pt3:int) & ((1 << 31) - 1); i = (va >> 39) & 511; - pt4[i] = pt3p | 0x007; + pt4[i] = pt3p | 0x003; loop { if pa == map_size || n == 0 { @@ -1147,7 +1151,7 @@ direct_map(brk: *int) { } i = (va >> 30) & 511; - pt3[i] = pa | 0x087; + pt3[i] = pa | 0x083; va = va + page_size; pa = pa + page_size; @@ -1155,7 +1159,32 @@ direct_map(brk: *int) { } } - wrcr3(pt4p); + invlpt(); +} + +invlpt() { + var global: *global; + var t: *task; + var tpt: *int; + var kpt: *int; + var i: int; + global = g(); + t = global.curtask; + if t.pt == global.kpt { + wrcr3(global.kpt); + return; + } + tpt = ptov(t.pt):*int; + kpt = ptov(global.kpt):*int; + i = 256; + loop { + if i == 512 { + break; + } + tpt[i] = kpt[i]; + i = i + 1; + } + wrcr3(t.pt); } setup_ring(ring: int, own: int) { @@ -3217,6 +3246,10 @@ tick(r: *regs) { memcpy(r:*byte, (&next.regs):*byte, sizeof(*r)); } +freept(pt: int) { + free(ptov(pt)); +} + free_task(t: *task) { var i: int; i = 0; @@ -3231,8 +3264,8 @@ free_task(t: *task) { i = i + 1; } vclose(t.cwd); + freept(t.pt); free(t.stack); - free(t.ustack); free(t:*byte); } @@ -3261,6 +3294,7 @@ schedule() { free_task(dead); } global.curtask = next; + invlpt(); } task_exit() { @@ -3528,8 +3562,6 @@ spawn(f: (func(t: *task)), name: *byte, a: *void): *task { bzero(t.files:*byte, 4096); t.stack = alloc(); bzero(t.stack, 4096); - t.ustack = alloc(); - bzero(t.ustack, 4096); t.name = name; t.regs.rsp = (t.stack:int) + 4096; t.regs.rip = _tstart:int; @@ -3537,6 +3569,8 @@ spawn(f: (func(t: *task)), name: *byte, a: *void): *task { t.regs.ss = 16; t.f = f; t.a = a; + t.pt = alloc_page(); + bzero(ptov(t.pt), 4096); flags = rdflags(); cli(); cur = global.curtask; @@ -3616,6 +3650,10 @@ strndup(s: *byte, n: int): *byte { return r; } +strdup(s: *byte): *byte { + return strndup(s, strlen(s)); +} + mkvnode(): *vnode { var v: *vnode; var global: *global; @@ -3769,6 +3807,11 @@ vopen(name: *byte, flags: int, mode: int): *vfile { if j == n { break; } else if name[j] == '/':byte { + if i == j { + i = i + 1; + j = j + 1; + continue; + } f = vlookup(d, &name[i], j - i, O_DIRECTORY, 0); vclose(d); d = f; @@ -3863,6 +3906,22 @@ vclose(f: *vfile): int { return vrelease(n); } +vseek(f: *vfile, o: int, w: int): int { + if w == 0 { + f.offset = o; + } else if w == 1 { + f.offset = f.offset + o; + } else if w == 2 { + f.offset = f.node.size + o; + } else { + return -1; + } + if f.offset < 0 { + f.offset = 0; + } + return f.offset; +} + vwrite_page(v: *vnode, o: int, b: *byte, n: int): int { var key: int; var p: *vpage; @@ -4066,8 +4125,8 @@ _ssr(r: *regs) { kputs("read\n"); r.rax = -1; } else if r.rax == 1 { - kputs("write\n"); - r.rax = -1; + xxd(r.rsi:*byte, r.rdx); + r.rax = r.rdx; } else if r.rax == 2 { kputs("open\n"); r.rax = -1; @@ -4105,7 +4164,9 @@ _ssr(r: *regs) { kputs("exec\n"); r.rax = -1; } else if r.rax == 60 { - kputs("exit\n"); + kputs("exit("); + kputd(r.rdi); + kputs(")\n"); task_exit(); } else if r.rax == 61 { kputs("wait\n"); @@ -4127,16 +4188,6 @@ _ssr(r: *regs) { } } -user() { -} - -_ustart() { - user(); - loop { - syscall(60, 0, 0, 0, 0, 0, 0); - } -} - initramfs(len: *int): *byte { return _include("initramfs", len); } @@ -4310,21 +4361,514 @@ userswitch(entry: int, stack: int) { r.cs = 40 | 3; r.rsp = stack; r.ss = 32 | 3; + invlpt(); taskswitch(&discard, &r); } -task_init(t: *task) { +map_user(vaddr: int): *byte { + var global: *global; + var task: *task; + var pt: *int; + var i: int; + + if (vaddr >> 47) != 0 || (vaddr & 4095) != 0 { + return 0: *byte; + } + + global = g(); + task = global.curtask; + + pt = ptov(task.pt):*int; + + i = (vaddr >> 39) & 255; + if !pt[i] { + pt[i] = alloc_page() | 7; + bzero(ptov(pt[i] & -4096), 4096); + } + pt = ptov(pt[i] & -4096):*int; + + i = (vaddr >> 30) & 511; + if !pt[i] { + pt[i] = alloc_page() | 7; + bzero(ptov(pt[i] & -4096), 4096); + } + pt = ptov(pt[i] & -4096):*int; + + i = (vaddr >> 21) & 511; + if !pt[i] { + pt[i] = alloc_page() | 7; + bzero(ptov(pt[i] & -4096), 4096); + } + pt = ptov(pt[i] & -4096):*int; + + i = (vaddr >> 12) & 511; + if !pt[i] { + pt[i] = alloc_page() | 7; + bzero(ptov(pt[i] & -4096), 4096); + return ptov(pt[i] & -4096); + } + + return 0:*byte; +} + +vload(f: *vfile, offset: int, vaddr: int, filesz: int, memsz: int): int { + var t: *task; + var i: int; + var o: int; + var sz: int; + var m: *byte; + + if filesz != 0 && (offset & 4095) != (vaddr & 4095) { + return -1; + } + + if filesz > memsz { + return -1; + } + + if offset < 0 || vaddr < 0 { + return -1; + } + + if vaddr < 4096 { + return -1; + } + + if filesz > 0 { + if vseek(f, offset, 0) != offset { + return -1; + } + } + + i = 0; + loop { + if i >= memsz { + break; + } + o = (vaddr + i) & 4065; + sz = 4096 - o; + m = map_user((vaddr + i) & -4096); + if !m { + return -1; + } + if i < filesz { + if sz > filesz - i { + if vread(f, &m[o], filesz - i) != filesz - i { + return -1; + } + } else { + if vread(f, &m[o], sz) != sz { + return -1; + } + } + } + i = i + sz; + } + + return 0; +} + +map_stack(argc: int, argv: **byte, envc: int, envv: **byte):int { + var m: *int; + var i: int; + var sp: int; + var n: int; + var len: int; + + sp = 0x7fffe000; + m = map_user(sp): *int; + + if !m { + return 0; + } + + m[0] = argc; + m[argc] = 0; + m[argc + envc + 1] = 0; + + n = argc * 8 + envc * 8 + 24; + + // copy args + i = 0; + loop { + if i == argc { + break; + } + + len = strlen(argv[i]); + if len >= 4096 - n { + return 0; + } + + memcpy(&(m:*byte)[n], argv[i], len + 1); + + m[i + 1] = sp + n; + + i = i + 1; + n = n + len + 1; + } + + // copy env + i = 0; + loop { + if i == envc { + break; + } + + len = strlen(envv[i]); + if len >= 4096 - n { + return 0; + } + + memcpy(&(m:*byte)[n], envv[i], len + 1); + + m[argc + i + 1] = sp + n; + + i = i + 1; + n = n + len + 1; + } + + // Allocate a user stack + i = 1; + loop { + if i == 16 { + break; + } + if !map_user(0x7fffe000 - 4096 * i) { + return 0; + } + i = i + 1; + } + + return sp; +} + +vexec(prog: *byte, argv: **byte, envp: **byte): int { var f: *vfile; - var buf: *byte; + var head: *byte; + var args: **byte; + var envs: **byte; + var nargs: int; + var nenv: int; var n: int; - f = vopen("init", O_RDONLY, 0); - if !f { - kdie("no init"); + var i: int; + var entry: int; + var phoff: int; + var phnum: int; + var size: int; + var p_type: int; + var p_offset: int; + var p_vaddr: int; + var p_filesz: int; + var p_memsz: int; + var pt: int; + var global: *global; + var t: *task; + var stack: int; + + global = g(); + t = global.curtask; + + pt = t.pt; + t.pt = alloc_page(); + bzero(ptov(t.pt), 4096); + + head = alloc(); + args = alloc():**byte; + envs = alloc():**byte; + nargs = 0; + nenv = 0; + f = 0:*vfile; + + // Copy args + if argv { + loop { + if nargs == 512 { + goto fail; + } + if !argv[nargs] { + break; + } + args[nargs] = strdup(argv[nargs]); + nargs = nargs + 1; + } } - buf = alloc(); - n = vread(f, buf, 4096); - xxd(buf, n); + + // Copy environment + if envp { + loop { + if nenv == 512 { + goto fail; + } + if !envp[nenv] { + break; + } + envs[nenv] = strdup(envp[nenv]); + nenv = nenv + 1; + } + } + + // Find interpreter + loop { + f = vopen(prog, O_RDONLY, 0); + if !f { + goto fail; + } + + n = vread(f, head, 4096); + if n >= 2 && head[0] == '#':byte && head[1] == '!':byte { + nargs = nargs + 1; + i = nargs; + loop { + if i == 0 { + break; + } + i = i - 1; + args[i] = args[i - 1]; + } + + i = 2; + loop { + if i == n { + goto fail; + } + if head[i] == '\n':byte { + break; + } + i = i + 1; + } + + args[0] = strndup(&head[2], i - 2); + if nargs > 1 { + free(args[1]); + } else { + nargs = 2; + } + args[1] = strdup(prog); + prog = args[0]; + vclose(f); + f = 0: *vfile; + continue; + } + + break; + } + + size = f.node.size; + + // Load elf + if n < 0x40 { + goto fail; + } + + // magic + if !(head[0]:int == 0x7f && head[1]:int == 0x45 && head[2]:int == 0x4c && head[3]:int == 0x46) { + goto fail; + } + + // 64 bit + if head[4]:int != 2 { + goto fail; + } + + // little endian + if head[5]:int != 1 { + goto fail; + } + + // version + if head[6]:int != 1 { + goto fail; + } + + // executable + if head[17]:int != 0 || head[16]:int != 2 { + goto fail; + } + + // machine + if head[19]:int != 0 || head[18]:int != 0x3e { + goto fail; + } + + // version + if !(head[23]:int == 0 && head[22]:int == 0 && head[21]:int == 0 && head[20]:int == 1) { + goto fail; + } + + // ehsize + if !(head[0x35]:int == 0 && head[0x34]:int == 0x40) { + goto fail; + } + + // phentsize + if !(head[0x37]:int == 0 && head[0x36]:int == 0x38) { + goto fail; + } + + // entry point + entry = head[24]:int + | (head[25]:int << 8) + | (head[26]:int << 16) + | (head[27]:int << 24) + | (head[28]:int << 32) + | (head[29]:int << 40) + | (head[30]:int << 48) + | (head[31]:int << 56); + if entry < 0 { + goto fail; + } + + phoff = head[32]:int + | (head[33]:int << 8) + | (head[34]:int << 16) + | (head[35]:int << 24) + | (head[36]:int << 32) + | (head[37]:int << 40) + | (head[38]:int << 48) + | (head[39]:int << 56); + + phnum = head[0x38]:int + | (head[0x39]:int << 8); + if phnum > 64 { + goto fail; + } + + if phoff > size || 56 * phnum > size - phoff { + goto fail; + } + + if vseek(f, phoff, 0) != phoff { + goto fail; + } + + n = vread(f, head, 56 * phnum); + if n != 56 * phnum { + goto fail; + } + + i = 0; + loop { + if i == phnum { + break; + } + + p_type = head[i * 56 + 0]:int + | (head[i * 56 + 1]:int << 8) + | (head[i * 56 + 2]:int << 16) + | (head[i * 56 + 3]:int << 24); + p_offset = head[i * 56 + 8]:int + | (head[i * 56 + 9]:int << 8) + | (head[i * 56 + 10]:int << 16) + | (head[i * 56 + 11]:int << 24) + | (head[i * 56 + 12]:int << 32) + | (head[i * 56 + 13]:int << 40) + | (head[i * 56 + 14]:int << 48) + | (head[i * 56 + 15]:int << 56); + p_vaddr = head[i * 56 + 16]:int + | (head[i * 56 + 17]:int << 8) + | (head[i * 56 + 18]:int << 16) + | (head[i * 56 + 19]:int << 24) + | (head[i * 56 + 20]:int << 32) + | (head[i * 56 + 21]:int << 40) + | (head[i * 56 + 22]:int << 48) + | (head[i * 56 + 23]:int << 56); + p_filesz = head[i * 56 + 32]:int + | (head[i * 56 + 33]:int << 8) + | (head[i * 56 + 34]:int << 16) + | (head[i * 56 + 35]:int << 24) + | (head[i * 56 + 36]:int << 32) + | (head[i * 56 + 37]:int << 40) + | (head[i * 56 + 38]:int << 48) + | (head[i * 56 + 39]:int << 56); + p_memsz = head[i * 56 + 40]:int + | (head[i * 56 + 41]:int << 8) + | (head[i * 56 + 42]:int << 16) + | (head[i * 56 + 43]:int << 24) + | (head[i * 56 + 44]:int << 32) + | (head[i * 56 + 45]:int << 40) + | (head[i * 56 + 46]:int << 48) + | (head[i * 56 + 47]:int << 56); + + if p_type == 1 { + if vload(f, p_offset, p_vaddr, p_filesz, p_memsz) != 0 { + goto fail; + } + } + + i = i + 1; + } + + // allocate a stack + stack = map_stack(nargs, args, nenv, envs); + if !stack { + goto fail; + } + vclose(f); + i = 0; + loop { + if i == nargs { + break; + } + if args[i] { + free(args[i]); + } + i = i + 1; + } + i = 0; + loop { + if i == nenv { + break; + } + if envs[i] { + free(envs[i]); + } + i = i + 1; + } + free(head); + free(args:*byte); + free(envs:*byte); + freept(pt); + userswitch(entry, stack); + kdie("unreachable"); + +:fail; + if f { + vclose(f); + } + i = 0; + loop { + if i == nargs { + break; + } + if args[i] { + free(args[i]); + } + i = i + 1; + } + i = 0; + loop { + if i == nenv { + break; + } + if envs[i] { + free(envs[i]); + } + i = i + 1; + } + free(head); + free(args:*byte); + free(envs:*byte); + freept(t.pt); + t.pt = pt; + return -1; +} + +task_init(t: *task) { + if vexec("/init", 0:**byte, 0:**byte) != 0 { + kdie("failed to exec init"); + } } _kstart(mb: int) { @@ -4352,6 +4896,7 @@ _kstart(mb: int) { task.next = &task; task.prev = &task; task.name = "_kstart"; + task.pt = rdcr3(); bzero((&global):*byte, sizeof(global)); global.ptr = &global; @@ -4373,6 +4918,7 @@ _kstart(mb: int) { global.fr = 0:*free_range; global.fp = 0:*free_page; + global.kpt = rdcr3(); mbinfo = ptov(mb); mmap = ptov(_r32(&mbinfo[48])): *int;