// SPDX-FileCopyrightText: 2024 Himbeer // // SPDX-License-Identifier: AGPL-3.0-or-later // This is an implementation of Sv39 paging, meaning that the virtual addresses // are 39 bits wide. Sv32 and Sv48 are currently not implemented. const std = @import("std"); const hwinfo = @import("hwinfo.zig"); const riscv = @import("riscv.zig"); // Defined by linker script. pub const text_start = @extern(*anyopaque, .{ .name = "_text_start" }); pub const text_end = @extern(*anyopaque, .{ .name = "_text_end" }); pub const rodata_start = @extern(*anyopaque, .{ .name = "_rodata_start" }); pub const rodata_end = @extern(*anyopaque, .{ .name = "_rodata_end" }); pub const userinit_start = @extern(*anyopaque, .{ .name = "_userinit_start" }); pub const userinit_end = @extern(*anyopaque, .{ .name = "_userinit_end" }); pub const data_start = @extern(*anyopaque, .{ .name = "_data_start" }); pub const data_end = @extern(*anyopaque, .{ .name = "_data_end" }); pub const bss_start = @extern(*anyopaque, .{ .name = "_bss_start" }); pub const bss_end = @extern(*anyopaque, .{ .name = "_bss_end" }); pub const stack_start = @extern(*anyopaque, .{ .name = "_stack_start" }); pub const stack_end = @extern(*anyopaque, .{ .name = "_stack_end" }); pub const stvec_stack_start = @extern(*anyopaque, .{ .name = "_stvec_stack_start" }); pub const stvec_stack_end = @extern(*anyopaque, .{ .name = "_stvec_stack_end" }); pub const heap_start = @extern(*anyopaque, .{ .name = "_heap_start" }); pub const heap_end = @extern(*anyopaque, .{ .name = "_heap_end" }); inline fn heapSize() usize { return @intFromPtr(heap_end) - @intFromPtr(heap_start); } pub const page_size: usize = 0x1000; // 4096 bytes pub const log2_page_size: u8 = @intCast(std.math.log2(page_size)); pub var next_mmio_vaddr: usize = 0xff000000; pub var kmem: *Table = undefined; pub const Error = error{ ZeroSize, OutOfMemory, AlreadyTaken, NotALeaf, }; pub const Mode = enum(u4) { bare, sv39 = 8, sv48, }; // SATP register, configures and enables the MMU (and thus paging). pub const Satp = packed struct(usize) { pub const Asid = u16; // Reverse field order so that @bitCast yields a usize with the right order. // Without this writing the value to the SATP register enables an invalid page table, // leaves the MMU disabled or causes other bugs. ppn: u44, asid: Asid, mode: Mode, }; // A page descriptor for use by the heap allocator. pub const Page = struct { flags: Flags, pub const Flags = packed struct { active: u1, last: u1, // Last page of contiguous allocation pub const clear = .{ .active = 0, .last = 0, }; }; // Marks a page as taken, optionally flagging it as the last page of an allocation. // Fails if the page is already taken. // Returns whether the operation was successful. pub fn take(self: *Page, last: bool) !void { if (@bitCast(self.flags.active)) return Error.AlreadyTaken; self.flags.active = 1; if (last) self.flags.last = 1; } }; // Returns the offset from the page base. Works with both physical and virtual addresses. // Offsets are never translated. fn offsetOf(addr: usize) usize { // Offset is in bottom 12 bits of both physical and virtual addresses. return addr & 0xfff; } // Returns the virtual page numbers of a virtual address by paging level. fn virtualPageNumbers(vaddr: usize) [3]usize { // Virtual address format: // // VPN[2] | VPN[1] | VPN[0] | offset // 9 bits | 9 bits | 9 bits | 12 bits // // Virtual page numbers are indexes into the page table of their level, // i.e. VPN[2] is an index to the root page table on level 2 // whereas VPN[1] is an index to the page table on level 1 specified by VPN[2]. // // Offsets are never translated. return [3]usize{ (vaddr >> 12) & 0x1ff, (vaddr >> 21) & 0x1ff, (vaddr >> 30) & 0x1ff, }; } // Returns the physical page numbers of a physical address by paging level. fn physicalPageNumbers(paddr: usize) [3]usize { // Physical address format: // // PPN[2] | PPN[1] | PPN[0] | offset // 26 bits | 9 bits | 9 bits | 12 bits // // PPN[i] is what to map VPN[i] to. // // Offsets are never translated. return [3]usize{ (paddr >> 12) & 0x1ff, (paddr >> 21) & 0x1ff, (paddr >> 30) & 0x3ff_ffff, }; } // Returns the page numbers of an address as a single integer. fn pageNumber(addr: usize) u44 { return @intCast(addr >> 12); } pub const EntryFlags = packed struct(u8) { valid: u1, read: u1, write: u1, exec: u1, user: u1, global: u1, accessed: u1, dirty: u1, pub const branch = EntryFlags{ .valid = 1, .read = 0, .write = 0, .exec = 0, .user = 0, .global = 0, .accessed = 0, .dirty = 0, }; pub const readOnly = EntryFlags{ .valid = 1, .read = 1, .write = 0, .exec = 0, .user = 0, .global = 0, .accessed = 1, .dirty = 0, }; pub const readWrite = EntryFlags{ .valid = 1, .read = 1, .write = 1, .exec = 0, .user = 0, .global = 0, .accessed = 1, .dirty = 1, }; pub const readExec = EntryFlags{ .valid = 1, .read = 1, .write = 0, .exec = 1, .user = 0, .global = 0, .accessed = 1, .dirty = 0, }; pub const userReadOnly = EntryFlags{ .valid = 1, .read = 1, .write = 0, .exec = 0, .user = 1, .global = 0, .accessed = 1, .dirty = 0, }; pub const userReadWrite = EntryFlags{ .valid = 1, .read = 1, .write = 1, .exec = 0, .user = 1, .global = 0, .accessed = 1, .dirty = 1, }; pub const userReadExec = EntryFlags{ .valid = 1, .read = 1, .write = 0, .exec = 1, .user = 1, .global = 0, .accessed = 1, .dirty = 0, }; pub fn isLeaf(self: EntryFlags) bool { return @bitCast(self.read) or @bitCast(self.write) or @bitCast(self.exec); } // Returns whether the entry permissions allow the accesses // specified in the `requested` argument. pub fn allowAccess(self: EntryFlags, requested: EntryFlags) bool { if (self.user != requested.user) return false; if (self.read < requested.read) return false; if (self.write < requested.write) return false; if (self.exec < requested.exec) return false; return true; } }; pub const Entry = packed struct(u64) { // Reverse field order so that @bitCast yields a u64 with the right order. // Without this writing the value to a page table creates an invalid entry, // thus resulting in page faults or hanging. flags: EntryFlags, rsw: u2, // Reserved for supervisor use. Currently unused. mapping: u44, reserved: u10, // Returns the physical page numbers to map to by paging level. pub fn physicalPageNumbers(self: Entry) [3]usize { // Mapping format: // // PPN[2] | PPN[1] | PPN[0] // 26 bits | 9 bits | 9 bits // // PPN[i] is what to map VPN[i] to. return [3]usize{ self.mapping & 0x1ff, (self.mapping >> 9) & 0x1ff, (self.mapping >> 18) & 0x3ff_ffff, }; } pub fn mappingAddr(self: Entry) usize { // Apply an offset of zero since entries always point to an aligned page // and this function should return a usable memory address. // Callers can change the offset if needed. return self.mapping << 12; } pub fn isValid(self: Entry) bool { return @bitCast(self.flags.valid); } // Returns whether the entry is a mapping (true) or another page table (false). pub fn isLeaf(self: Entry) bool { return self.flags.isLeaf(); } }; pub const Table = struct { // Do not add any fields. The unmap function relies on mappings pointing to page tables, // casting them to this data structure. This cast becomes invalid if additional fields // are added, especially if they preceed the entries field. entries: [512]Entry, // Create a mapping of a certain virtual page address to a physical page address, // discarding offsets. The mapping is written to the specified level, // creating page tables as needed. // // The mapping must be a leaf, meaning that passing flags // that indicate no access permissions at all will return an error. // // This function internally uses zeroedAlloc to allocate memory for the required page tables, // but assumes that the physical address to map to has already been allocated by the caller. pub fn map(root: *Table, vaddr: usize, paddr: usize, flags: EntryFlags, level: usize) !void { if (!flags.isLeaf()) return Error.NotALeaf; const vpn = virtualPageNumbers(vaddr); // Grab the entry in the root (level 2) page table. var v = &root.entries[vpn[2]]; // Walk the page table levels from high to low under the assumption that root is valid. for (level..2) |iInv| { const i = 1 - iInv; // If this entry doesn't point to a lower-level page table or memory page yet, // allocate one. if (!v.isValid()) { const page = try zeroedAlloc(1); v.* = .{ .flags = EntryFlags.branch, .rsw = 0, .mapping = pageNumber(@intFromPtr(page.ptr)), // Remove the offset, a mapping is just the PPN. .reserved = 0, }; } // Get the entries of the existing or newly created page table. // This cast is safe because the only field of a Table is its entries. const table: *Table = @ptrFromInt(v.mappingAddr()); // Grab the entry of the table by indexing it according to the corresponding VPN. v = &table.entries[vpn[i]]; } // Write the actual mapping to the correct table on the requested level. v.* = .{ .flags = flags, .rsw = 0, .mapping = @intCast(pageNumber(paddr)), // Remove the offset, a mapping is just the PPN. .reserved = 0, }; } // Deallocate child page tables recursively. The provided table itself is not affected, // allowing partial unmapping of multi-level tables. // // This function does not deallocate memory pages mapped by the provided table // or any of its (recursive) children. pub fn unmap(table: *Table) void { for (&table.entries) |*entry| { if (entry.isValid() and !entry.isLeaf()) { // This cast is safe because the only field of a Table is its entries. const lowerLevelTable: *Table = @ptrFromInt(entry.mappingAddr()); lowerLevelTable.unmap(); entry.flags.valid = 0; free(lowerLevelTable); } } } pub fn unmapEntry(root: *Table, vaddr: usize) void { const vpn = virtualPageNumbers(vaddr); var v = &root.entries[vpn[2]]; for (0..3) |iInv| { const i = 2 - iInv; if (!v.isValid()) { break; } else if (v.isLeaf()) { v.flags.valid = 0; // IMPORTANT: Flush TLB correctly // if sfence.vma in process.switchTo is ever optimized. return; } const entry: *[512]Entry = @ptrFromInt(v.mappingAddr()); v = &entry[vpn[i - 1]]; } } // Returns the physical address to a virtual address using the provided level 2 page table. // This can be used to access virtual addresses whose page table isn't active // in the MMU / SATP CSR (Control and Status Register), making it possible // to access the memory space of a user mode process (from its perspective) // from supervisor or machine mode cleanly. // // If the permissions requested using the `flags` argument exceed those // found in the page table entry, no value is returned. // // The absence of a return value is equivalent to a page fault. pub fn translate(root: *const Table, vaddr: usize, flags: EntryFlags) ?usize { const vpn = virtualPageNumbers(vaddr); // Grab the entry in the root (level 2) page table. var v = &root.entries[vpn[2]]; // Walk the page table levels from high to low. for (0..3) |iInv| { const i = 2 - iInv; if (!v.isValid()) { break; } else if (v.isLeaf()) { // Mapping found. if (!v.flags.allowAccess(flags)) return null; // Create a mask starting directly below / after PN[i]. // Since all levels can have leaves i is not guaranteed to be zero. const offset_mask = (@as(usize, 1) << @intCast(12 + 9 * i)) - 1; const offset = vaddr & offset_mask; const ppn_joined = v.mappingAddr() & ~offset_mask; return ppn_joined | offset; } // Get the entries of the page table of the current level. const entry: *[512]Entry = @ptrFromInt(v.mappingAddr()); // Grab the entry of the table by indexing it according to the corresponding VPN. v = &entry[vpn[i - 1]]; } return null; } // Creates an identity mapping for all pages needed for the specified range // using the map function. An identity mapping doesn't actually translate // memory addresses, virtual addresses and physical addresses are the same. // // The start address is inclusive while end is exclusive. // // This is still useful because it can be used to prevent the kernel // from accessing machine-reserved memory by accident. pub fn identityMapRange(root: *Table, start: usize, end: usize, flags: EntryFlags) !void { // Mask out the offset within the starting page. const start_page = start & ~(page_size - 1); // Mask out the offset within the ending page, but ensure the returned page address // is always the last required page for the mapping (end is exclusive, // so subtracting 1 ends up in the previous page on boundaries, // eliminating one useless mapping). The resulting value is inclusive. const end_page = (end - 1) & ~(page_size - 1); var page = start_page; while (page <= end_page) : (page += page_size) { try root.map(page, page, flags, 0); } } // Constructs the SATP register value needed to activate the specified page table // using the provided Address Space Identifier (ASID). // // The kernel page table always has ASID 0 (not mandated by the RISC-V specification). pub fn satp(root: *const Table, asid: Satp.Asid) Satp { return .{ .ppn = @intCast(pageNumber(@intFromPtr(root))), .asid = asid, .mode = .sv39, }; } pub fn mapKernel(root: *Table) !void { try root.identityMapRange(@intFromPtr(text_start), @intFromPtr(text_end), EntryFlags.readExec); try root.identityMapRange(@intFromPtr(rodata_start), @intFromPtr(rodata_end), EntryFlags.readOnly); try root.identityMapRange(@intFromPtr(userinit_start), @intFromPtr(userinit_end), EntryFlags.readOnly); try root.identityMapRange(@intFromPtr(data_start), @intFromPtr(data_end), EntryFlags.readWrite); try root.identityMapRange(@intFromPtr(bss_start), @intFromPtr(bss_end), EntryFlags.readWrite); try root.identityMapRange(@intFromPtr(stack_start), @intFromPtr(stack_end), EntryFlags.readWrite); try root.identityMapRange(@intFromPtr(stvec_stack_start), @intFromPtr(stvec_stack_end), EntryFlags.readWrite); try root.identityMapRange(@intFromPtr(heap_start), @intFromPtr(heap_end), EntryFlags.readWrite); } pub fn mapDevice(root: *Table, reg: *hwinfo.Reg) !void { const physical_start = reg.addr & ~(page_size - 1); const physical_end = (reg.addr + reg.len - 1) & ~(page_size - 1); reg.addr = next_mmio_vaddr | (reg.addr & (page_size - 1)); var paddr = physical_start; while (paddr <= physical_end) : (paddr += page_size) { try root.map(next_mmio_vaddr, paddr, EntryFlags.readWrite, 0); next_mmio_vaddr += page_size; } } }; pub fn init() !void { const num_pages = heapSize() / page_size; const pages: [*]Page = @ptrCast(heap_start); for (0..num_pages) |i| { pages[i].flags = Page.Flags.clear; } kmem = @ptrCast(try zeroedAlloc(1)); try kmem.mapKernel(); } // Allocate memory pages. Passing n <= 0 results in an error. pub fn alloc(n: usize) ![]align(page_size) u8 { if (n <= 0) return Error.ZeroSize; const num_pages = heapSize() / page_size; // Start allocating beyond page descriptors. const pages = @intFromPtr(heap_start) + num_pages * @sizeOf(Page); const alloc_start = std.mem.alignForwardLog2(pages, log2_page_size); const descriptors: [*]Page = @ptrCast(heap_start); // Iterate over potential starting points. // The subtraction of n prevents unnecessary iterations for starting points // that don't leave enough space for the whole allocation. for (0..num_pages - n) |i| { if (!@bitCast(descriptors[i].flags.active)) { // Free starting page found. var insufficient = false; // Check if there is enough contiguous free space for the whole allocation. // If not, move on to the next potential starting point. for (i..n + i) |j| { if (@bitCast(descriptors[j].flags.active)) { insufficient = true; break; } } if (!insufficient) { // Mark all allocated pages as taken. for (i..n + i - 1) |j| { try descriptors[j].take(false); } try descriptors[n + i - 1].take(true); // Construct a pointer to the first page using its descriptor number. const first = alloc_start + i * page_size; const allocation: [*]align(page_size) u8 = @ptrFromInt(first); return allocation[0 .. n * page_size]; } } } return Error.OutOfMemory; } // Free (contiguous) memory page(s). pub fn free(memory: anytype) void { const Slice = @typeInfo(@TypeOf(memory)).Pointer; const bytes = std.mem.asBytes(memory); const bytes_len = bytes.len + if (Slice.sentinel != null) @sizeOf(Slice.child) else 0; if (bytes_len == 0) return; const num_pages = heapSize() / page_size; // Start allocating beyond page descriptors. const pages = @intFromPtr(heap_start) + num_pages * @sizeOf(Page); const alloc_start = std.mem.alignForwardLog2(pages, log2_page_size); // Restore the address to the page descriptor flags from the address of its contents // by restoring the descriptor number and indexing the descriptor table // at the start of the heap using it. const descriptor_offset = (@intFromPtr(bytes.ptr) - alloc_start) / page_size; const addr = @intFromPtr(heap_start) + descriptor_offset; var page: [*]Page = @ptrFromInt(addr); // Mark all but the last page as free. // A double-free check is performed on the last page before it is freed. while (@bitCast(page[0].flags.active) and !@bitCast(page[0].flags.last)) : (page += 1) { page[0].flags = Page.Flags.clear; } // Mark the last page as free. page[0].flags = Page.Flags.clear; } // Allocate memory pages and overwrite their contents with zeroes for added security. // Passing n <= 0 results in an error. pub fn zeroedAlloc(n: usize) Error![]align(page_size) u8 { const ret = try alloc(n); const satp = riscv.satp.read(); if (satp.mode != .bare) { const page_table: *Table = @ptrFromInt(satp.ppn << 12); const start = @intFromPtr(ret.ptr); const end = start + ret.len; try page_table.identityMapRange(start, end, EntryFlags.readWrite); } // Write zeroes in batches of 64-bit to reduce the amount of store instructions. // The remainder / remaining bytes don't need to be accounted for // because page_size (4096) is divisible by 8. const len = (n * page_size) / 8; const ptr: []volatile u64 = @as([*]volatile u64, @ptrCast(ret))[0..len]; for (0..len) |i| { ptr[i] = 0; } return ret; } pub fn setUserMemoryAccess(enable: bool) void { var sstatus = riscv.sstatus.read(); sstatus.supervisor_user_memory_access = @bitCast(enable); riscv.sstatus.write(sstatus); }