aboutsummaryrefslogtreecommitdiff
path: root/src/lib/paging.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/paging.zig')
-rw-r--r--src/lib/paging.zig557
1 files changed, 557 insertions, 0 deletions
diff --git a/src/lib/paging.zig b/src/lib/paging.zig
new file mode 100644
index 0000000..1e785e7
--- /dev/null
+++ b/src/lib/paging.zig
@@ -0,0 +1,557 @@
+// SPDX-FileCopyrightText: 2024 Himbeer <himbeer@disroot.org>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+// This is an implementation of Sv39 paging, meaning that the virtual addresses
+// are 39 bits wide. Sv32 and Sv48 are currently not implemented.
+
+const hwinfo = @import("hwinfo.zig");
+
+// Defined by linker script.
+pub const text_start = @extern(*anyopaque, .{ .name = "_text_start" });
+pub const text_end = @extern(*anyopaque, .{ .name = "_text_end" });
+pub const rodata_start = @extern(*anyopaque, .{ .name = "_rodata_start" });
+pub const rodata_end = @extern(*anyopaque, .{ .name = "_rodata_end" });
+pub const data_start = @extern(*anyopaque, .{ .name = "_data_start" });
+pub const data_end = @extern(*anyopaque, .{ .name = "_data_end" });
+pub const sdata_start = @extern(*anyopaque, .{ .name = "_sdata_start" });
+pub const sdata_end = @extern(*anyopaque, .{ .name = "_sdata_end" });
+pub const bss_start = @extern(*anyopaque, .{ .name = "_bss_start" });
+pub const bss_end = @extern(*anyopaque, .{ .name = "_bss_end" });
+pub const sbss_start = @extern(*anyopaque, .{ .name = "_sbss_start" });
+pub const sbss_end = @extern(*anyopaque, .{ .name = "_sbss_end" });
+pub const stack_start = @extern(*anyopaque, .{ .name = "_stack_start" });
+pub const stack_end = @extern(*anyopaque, .{ .name = "_stack_end" });
+pub const stvec_stack_start = @extern(*anyopaque, .{ .name = "_stvec_stack_start" });
+pub const stvec_stack_end = @extern(*anyopaque, .{ .name = "_stvec_stack_end" });
+pub const heap_start = @extern(*anyopaque, .{ .name = "_heap_start" });
+pub const heap_end = @extern(*anyopaque, .{ .name = "_heap_end" });
+
+inline fn heapSize() usize {
+ return @intFromPtr(heap_end) - @intFromPtr(heap_start);
+}
+
+pub const page_size: usize = 0x1000; // 4096 bytes
+
+pub var next_mmio_vaddr: usize = 0xff000000;
+
+// Aligns an address with an offset to the next page.
+// Doesn't change addresses that are already aligned.
+fn pageAlign(addr: usize) usize {
+ return (addr + (page_size - 1)) & ~(page_size - 1);
+}
+
+pub const AllocError = error{
+ ZeroSize,
+ OutOfMemory,
+ OutOfRange,
+ DoubleFree,
+ AlreadyTaken,
+};
+
+pub const TableError = error{
+ NotALeaf,
+};
+
+pub const Mode = enum(u4) {
+ bare = 0,
+ sv39 = 8,
+ sv48 = 9,
+};
+
+// SATP register, configures and enables the MMU (and thus paging).
+pub const Satp = packed struct(usize) {
+ pub const Asid = u16;
+
+ // Reverse field order so that @bitCast yields a usize with the right order.
+ // Without this writing the value to the SATP register enables an invalid page table,
+ // leaves the MMU disabled or causes other bugs.
+ ppn: u44,
+ asid: Asid,
+ mode: Mode,
+};
+
+// A page descriptor for use by the heap allocator.
+pub const Page = struct {
+ flags: Flags,
+
+ pub const Flags = packed struct {
+ active: u1,
+ last: u1, // Last page of contiguous allocation
+
+ pub const clear = .{
+ .active = 0,
+ .last = 0,
+ };
+ };
+
+ // Marks a page as taken, optionally flagging it as the last page of an allocation.
+ // Fails if the page is already taken.
+ // Returns whether the operation was successful.
+ pub fn take(self: *Page, last: bool) !void {
+ if (@bitCast(self.flags.active)) return AllocError.AlreadyTaken;
+
+ self.flags.active = 1;
+ if (last) self.flags.last = 1;
+ }
+};
+
+// Returns the offset from the page base. Works with both physical and virtual addresses.
+// Offsets are never translated.
+fn offsetOf(addr: usize) usize {
+ // Offset is in bottom 12 bits of both physical and virtual addresses.
+ return addr & 0xfff;
+}
+
+// Returns the virtual page numbers of a virtual address by paging level.
+fn virtualPageNumbers(vaddr: usize) [3]usize {
+ // Virtual address format:
+ //
+ // VPN[2] | VPN[1] | VPN[0] | offset
+ // 9 bits | 9 bits | 9 bits | 12 bits
+ //
+ // Virtual page numbers are indexes into the page table of their level,
+ // i.e. VPN[2] is an index to the root page table on level 2
+ // whereas VPN[1] is an index to the page table on level 1 specified by VPN[2].
+ //
+ // Offsets are never translated.
+
+ return [3]usize{
+ (vaddr >> 12) & 0x1ff,
+ (vaddr >> 21) & 0x1ff,
+ (vaddr >> 30) & 0x1ff,
+ };
+}
+
+// Returns the physical page numbers of a physical address by paging level.
+fn physicalPageNumbers(paddr: usize) [3]usize {
+ // Physical address format:
+ //
+ // PPN[2] | PPN[1] | PPN[0] | offset
+ // 26 bits | 9 bits | 9 bits | 12 bits
+ //
+ // PPN[i] is what to map VPN[i] to.
+ //
+ // Offsets are never translated.
+
+ return [3]usize{
+ (paddr >> 12) & 0x1ff,
+ (paddr >> 21) & 0x1ff,
+ (paddr >> 30) & 0x3ff_ffff,
+ };
+}
+
+// Returns the page numbers of an address as a single integer.
+fn pageNumber(addr: usize) usize {
+ return addr >> 12;
+}
+
+pub const EntryFlags = packed struct(u8) {
+ valid: u1,
+ read: u1,
+ write: u1,
+ exec: u1,
+ user: u1,
+ global: u1,
+ accessed: u1,
+ dirty: u1,
+
+ pub const branch = EntryFlags{
+ .valid = 1,
+ .read = 0,
+ .write = 0,
+ .exec = 0,
+ .user = 0,
+ .global = 0,
+ .accessed = 0,
+ .dirty = 0,
+ };
+
+ pub const readOnly = EntryFlags{
+ .valid = 1,
+ .read = 1,
+ .write = 0,
+ .exec = 0,
+ .user = 0,
+ .global = 0,
+ .accessed = 1,
+ .dirty = 0,
+ };
+
+ pub const readWrite = EntryFlags{
+ .valid = 1,
+ .read = 1,
+ .write = 1,
+ .exec = 0,
+ .user = 0,
+ .global = 0,
+ .accessed = 1,
+ .dirty = 1,
+ };
+
+ pub const readExec = EntryFlags{
+ .valid = 1,
+ .read = 1,
+ .write = 0,
+ .exec = 1,
+ .user = 0,
+ .global = 0,
+ .accessed = 1,
+ .dirty = 0,
+ };
+
+ pub const userReadOnly = EntryFlags{
+ .valid = 1,
+ .read = 1,
+ .write = 0,
+ .exec = 0,
+ .user = 1,
+ .global = 0,
+ .accessed = 1,
+ .dirty = 0,
+ };
+
+ pub const userReadWrite = EntryFlags{
+ .valid = 1,
+ .read = 1,
+ .write = 1,
+ .exec = 0,
+ .user = 1,
+ .global = 0,
+ .accessed = 1,
+ .dirty = 1,
+ };
+
+ pub const userReadExec = EntryFlags{
+ .valid = 1,
+ .read = 1,
+ .write = 0,
+ .exec = 1,
+ .user = 1,
+ .global = 0,
+ .accessed = 1,
+ .dirty = 0,
+ };
+
+ pub fn isLeaf(self: EntryFlags) bool {
+ return @bitCast(self.read) or @bitCast(self.write) or @bitCast(self.exec);
+ }
+};
+
+pub const Entry = packed struct(u64) {
+ // Reverse field order so that @bitCast yields a u64 with the right order.
+ // Without this writing the value to a page table creates an invalid entry,
+ // thus resulting in page faults or hanging.
+ flags: EntryFlags,
+ rsw: u2, // Reserved for supervisor use. Currently unused.
+ mapping: u44,
+ reserved: u10,
+
+ // Returns the physical page numbers to map to by paging level.
+ pub fn physicalPageNumbers(self: Entry) [3]usize {
+ // Mapping format:
+ //
+ // PPN[2] | PPN[1] | PPN[0]
+ // 26 bits | 9 bits | 9 bits
+ //
+ // PPN[i] is what to map VPN[i] to.
+
+ return [3]usize{
+ self.mapping & 0x1ff,
+ (self.mapping >> 9) & 0x1ff,
+ (self.mapping >> 18) & 0x3ff_ffff,
+ };
+ }
+
+ pub fn mappingAddr(self: Entry) usize {
+ // Apply an offset of zero since entries always point to an aligned page
+ // and this function should return a usable memory address.
+ // Callers can change the offset if needed.
+ return self.mapping << 12;
+ }
+
+ pub fn isValid(self: Entry) bool {
+ return @bitCast(self.flags.valid);
+ }
+
+ // Returns whether the entry is a mapping (true) or another page table (false).
+ pub fn isLeaf(self: Entry) bool {
+ return self.flags.isLeaf();
+ }
+};
+
+pub const Table = struct {
+ // Do not add any fields. The unmap function relies on mappings pointing to page tables,
+ // casting them to this data structure. This cast becomes invalid if additional fields
+ // are added, especially if they preceed the entries field.
+
+ entries: [512]Entry,
+
+ // Create a mapping of a certain virtual page address to a physical page address,
+ // discarding offsets. The mapping is written to the specified level,
+ // creating page tables as needed.
+ //
+ // The mapping must be a leaf, meaning that passing flags
+ // that indicate no access permissions at all will return an error.
+ //
+ // This function internally uses zeroedAlloc to allocate memory for the required page tables,
+ // but assumes that the physical address to map to has already been allocated by the caller.
+ pub fn map(root: *Table, vaddr: usize, paddr: usize, flags: EntryFlags, level: usize) !void {
+ if (!flags.isLeaf()) return TableError.NotALeaf;
+
+ const vpn = virtualPageNumbers(vaddr);
+
+ // Grab the entry in the root (level 2) page table.
+ var v = &root.entries[vpn[2]];
+
+ // Walk the page table levels from high to low under the assumption that root is valid.
+ for (level..2) |iInv| {
+ const i = 1 - iInv;
+
+ // If this entry doesn't point to a lower-level page table or memory page yet,
+ // allocate one.
+ if (!v.isValid()) {
+ const page = try zeroedAlloc(1);
+ v.* = .{
+ .flags = EntryFlags.branch,
+ .rsw = 0,
+ .mapping = @intCast(pageNumber(@intFromPtr(page))), // Remove the offset, a mapping is just the PPN.
+ .reserved = 0,
+ };
+ }
+
+ // Get the entries of the existing or newly created page table.
+ // This cast is safe because the only field of a Table is its entries.
+ const table: *Table = @ptrFromInt(v.mappingAddr());
+ // Grab the entry of the table by indexing it according to the corresponding VPN.
+ v = &table.entries[vpn[i]];
+ }
+
+ // Write the actual mapping to the correct table on the requested level.
+ v.* = .{
+ .flags = flags,
+ .rsw = 0,
+ .mapping = @intCast(pageNumber(paddr)), // Remove the offset, a mapping is just the PPN.
+ .reserved = 0,
+ };
+ }
+
+ // Deallocate child page tables recursively. The provided table itself is not affected,
+ // allowing partial unmapping of multi-level tables.
+ //
+ // This function does not deallocate memory pages mapped by the provided table
+ // or any of its (recursive) children.
+ pub fn unmap(table: *Table) void {
+ for (table.entries) |entry| {
+ if (entry.isValid() and !entry.isLeaf()) {
+ // This cast is safe because the only field of a Table is its entries.
+ const lowerLevelTable: *Table = @ptrFromInt(entry.mappingAddr());
+ lowerLevelTable.unmap();
+ entry.flags.valid = 0;
+ free(lowerLevelTable);
+ }
+ }
+ }
+
+ // Returns the physical address to a virtual address using the provided level 2 page table.
+ // This can be used to access virtual addresses whose page table isn't active
+ // in the MMU / SATP CSR (Control and Status Register), making it possible
+ // to access the memory space of a user mode process (from its perspective)
+ // from supervisor mode cleanly.
+ //
+ // The absence of a return value is equivalent to a page fault.
+ pub fn translate(root: *const Table, vaddr: usize) ?usize {
+ const vpn = virtualPageNumbers(vaddr);
+
+ // Grab the entry in the root (level 2) page table.
+ var v = &root.entries[vpn[2]];
+
+ // Walk the page table levels from high to low.
+ for (0..3) |iInv| {
+ const i = 2 - iInv;
+
+ if (!v.isValid()) {
+ break;
+ } else if (v.isLeaf()) {
+ // Mapping found.
+
+ // Create a mask starting directly below / after PN[i].
+ // Since all levels can have leaves i is not guaranteed to be zero.
+ const offsetMask = (@as(usize, 1) << @intCast(12 + 9 * i)) - 1;
+ const offset = vaddr & offsetMask;
+ const ppnJoined = v.mappingAddr() & ~offsetMask;
+
+ return ppnJoined | offset;
+ }
+
+ // Get the entries of the page table of the current level.
+ const entry: *[512]Entry = @ptrFromInt(v.mappingAddr());
+ // Grab the entry of the table by indexing it according to the corresponding VPN.
+ v = &entry[vpn[i - 1]];
+ }
+
+ return null;
+ }
+
+ // Creates an identity mapping for all pages needed for the specified range
+ // using the map function. An identity mapping doesn't actually translate
+ // memory addresses, virtual addresses and physical addresses are the same.
+ //
+ // The start address is inclusive while end is exclusive.
+ //
+ // This is still useful because it can be used to prevent the kernel
+ // from accessing machine-reserved memory by accident.
+ pub fn identityMapRange(root: *Table, start: usize, end: usize, flags: EntryFlags) !void {
+ // Mask out the offset within the starting page.
+ const startPage = start & ~(page_size - 1);
+ // Mask out the offset within the ending page, but ensure the returned page address
+ // is always the last required page for the mapping (end is exclusive,
+ // so subtracting 1 ends up in the previous page on boundaries,
+ // eliminating one useless mapping). The resulting value is inclusive.
+ const endPage = (end - 1) & ~(page_size - 1);
+
+ var page = startPage;
+ while (page <= endPage) : (page += page_size) {
+ try root.map(page, page, flags, 0);
+ }
+ }
+
+ // Constructs the SATP register value needed to activate the specified page table
+ // using the provided Address Space Identifier (ASID).
+ //
+ // The kernel page table always has ASID 0 (not mandated by the RISC-V specification).
+ pub fn satp(root: *const Table, asid: Satp.Asid) Satp {
+ return .{
+ .ppn = @intCast(pageNumber(@intFromPtr(root))),
+ .asid = asid,
+ .mode = .sv39,
+ };
+ }
+
+ pub fn mapKernel(root: *Table) !void {
+ try root.identityMapRange(@intFromPtr(text_start), @intFromPtr(text_end), EntryFlags.readExec);
+ try root.identityMapRange(@intFromPtr(rodata_start), @intFromPtr(rodata_end), EntryFlags.readOnly);
+ try root.identityMapRange(@intFromPtr(data_start), @intFromPtr(data_end), EntryFlags.readWrite);
+ try root.identityMapRange(@intFromPtr(sdata_start), @intFromPtr(sdata_end), EntryFlags.readWrite);
+ try root.identityMapRange(@intFromPtr(bss_start), @intFromPtr(bss_end), EntryFlags.readWrite);
+ try root.identityMapRange(@intFromPtr(sbss_start), @intFromPtr(sbss_end), EntryFlags.readWrite);
+ try root.identityMapRange(@intFromPtr(stack_start), @intFromPtr(stack_end), EntryFlags.readWrite);
+ try root.identityMapRange(@intFromPtr(stvec_stack_start), @intFromPtr(stvec_stack_end), EntryFlags.readWrite);
+ try root.identityMapRange(@intFromPtr(heap_start), @intFromPtr(heap_end), EntryFlags.readWrite);
+ }
+
+ pub fn mapDevice(root: *Table, reg: *hwinfo.Reg) !void {
+ const physical_start = reg.start & ~(page_size - 1);
+ const physical_end = (reg.start + reg.len - 1) & ~(page_size - 1);
+
+ reg.addr = next_mmio_vaddr | (reg.start & (page_size - 1));
+
+ var paddr = physical_start;
+ while (paddr <= physical_end) : (paddr += page_size) {
+ try root.map(next_mmio_vaddr, paddr, EntryFlags.readWrite, 0);
+ next_mmio_vaddr += page_size;
+ }
+ }
+};
+
+pub fn init() void {
+ const num_pages = heapSize() / page_size;
+ const pages: [*]Page = @ptrCast(heap_start);
+
+ for (0..num_pages) |i| {
+ pages[i].flags = Page.Flags.clear;
+ }
+}
+
+// Allocate memory pages. Passing n <= 0 results in an error.
+pub fn alloc(n: usize) !*void {
+ if (n <= 0) return AllocError.ZeroSize;
+
+ const num_pages = heapSize() / page_size;
+ // Start allocating beyond page descriptors.
+ const alloc_start = pageAlign(@intFromPtr(heap_start) + num_pages * @sizeOf(Page));
+
+ const pages: [*]Page = @ptrCast(heap_start);
+
+ // Iterate over potential starting points.
+ // The subtraction of n prevents unnecessary iterations for starting points
+ // that don't leave enough space for the whole allocation.
+ for (0..num_pages - n) |i| {
+ if (!@bitCast(pages[i].flags.active)) {
+ // Free starting page found.
+
+ var insufficient = false;
+
+ // Check if there is enough contiguous free space for the whole allocation.
+ // If not, move on to the next potential starting point.
+ for (i..n + i) |j| {
+ if (@bitCast(pages[j].flags.active)) {
+ insufficient = true;
+ break;
+ }
+ }
+
+ if (!insufficient) {
+ // Mark all allocated pages as taken.
+ for (i..n + i - 1) |j| {
+ try pages[j].take(false);
+ }
+ try pages[n + i - 1].take(true);
+
+ // Construct a pointer to the first page using its descriptor number.
+ return @ptrFromInt(alloc_start + i * page_size);
+ }
+ }
+ }
+
+ return AllocError.OutOfMemory;
+}
+
+// Free (contiguous) memory page(s). Provides limited protection against double-frees.
+pub fn free(ptr: *void) !void {
+ const num_pages = heapSize() / page_size;
+ // Start allocating beyond page descriptors.
+ const alloc_start = pageAlign(@intFromPtr(heap_start) + num_pages * @sizeOf(Page));
+
+ // Restore the address to the page descriptor flags from the address of its contents
+ // by restoring the descriptor number and indexing the descriptor table
+ // at the start of the heap using it.
+ const addr = @intFromPtr(heap_start) + (@intFromPtr(ptr) - alloc_start) / page_size;
+
+ // Ensure basic address sanity.
+ // Does not check descriptor table bounds.
+ if (addr < @intFromPtr(heap_start) or addr >= @intFromPtr(heap_start) + heapSize()) return AllocError.OutOfRange;
+
+ var page: [*]Page = @ptrFromInt(addr);
+
+ // Mark all but the last page as free.
+ // A double-free check is performed on the last page before it is freed.
+ while (@bitCast(page[0].flags.active) and !@bitCast(page[0].flags.last)) : (page += 1) {
+ page[0].flags = Page.Flags.clear;
+ }
+
+ // Free page encountered, but it isn't marked as the last. Potential double-free.
+ if (!@bitCast(page[0].flags.last)) return AllocError.DoubleFree;
+
+ // Mark the last page as free.
+ page[0].flags = Page.Flags.clear;
+}
+
+// Allocate memory pages and overwrite their contents with zeroes for added security.
+// Passing n <= 0 results in an error.
+pub fn zeroedAlloc(n: usize) !*void {
+ const ret = try alloc(n);
+
+ // Write zeroes in batches of 64-bit to reduce the amount of store instructions.
+ // The remainder / remaining bytes don't need to be accounted for
+ // because page_size (4096) is divisible by 8.
+
+ const size = (n * page_size) / 8;
+ const ptr: [*]volatile u64 = @alignCast(@ptrCast(ret));
+
+ for (0..size) |i| {
+ ptr[i] = 0;
+ }
+
+ return ret;
+}