1 files changed, 557 insertions, 0 deletions
diff --git a/src/lib/paging.zig b/src/lib/paging.zig
new file mode 100644
index 0000000..1e785e7
--- /dev/null
+++ b/src/lib/paging.zig
@@ -0,0 +1,557 @@
+// SPDX-FileCopyrightText: 2024 Himbeer <himbeer@disroot.org>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+// This is an implementation of Sv39 paging, meaning that the virtual addresses
+// are 39 bits wide. Sv32 and Sv48 are currently not implemented.
+
+const hwinfo = @import("hwinfo.zig");
+
+// Defined by linker script.
+pub const text_start = @extern(*anyopaque, .{ .name = "_text_start" });
+pub const text_end = @extern(*anyopaque, .{ .name = "_text_end" });
+pub const rodata_start = @extern(*anyopaque, .{ .name = "_rodata_start" });
+pub const rodata_end = @extern(*anyopaque, .{ .name = "_rodata_end" });
+pub const data_start = @extern(*anyopaque, .{ .name = "_data_start" });
+pub const data_end = @extern(*anyopaque, .{ .name = "_data_end" });
+pub const sdata_start = @extern(*anyopaque, .{ .name = "_sdata_start" });
+pub const sdata_end = @extern(*anyopaque, .{ .name = "_sdata_end" });
+pub const bss_start = @extern(*anyopaque, .{ .name = "_bss_start" });
+pub const bss_end = @extern(*anyopaque, .{ .name = "_bss_end" });
+pub const sbss_start = @extern(*anyopaque, .{ .name = "_sbss_start" });
+pub const sbss_end = @extern(*anyopaque, .{ .name = "_sbss_end" });
+pub const stack_start = @extern(*anyopaque, .{ .name = "_stack_start" });
+pub const stack_end = @extern(*anyopaque, .{ .name = "_stack_end" });
+pub const stvec_stack_start = @extern(*anyopaque, .{ .name = "_stvec_stack_start" });
+pub const stvec_stack_end = @extern(*anyopaque, .{ .name = "_stvec_stack_end" });
+pub const heap_start = @extern(*anyopaque, .{ .name = "_heap_start" });
+pub const heap_end = @extern(*anyopaque, .{ .name = "_heap_end" });
+
+inline fn heapSize() usize {
+    return @intFromPtr(heap_end) - @intFromPtr(heap_start);
+}
+
+pub const page_size: usize = 0x1000; // 4096 bytes
+
+pub var next_mmio_vaddr: usize = 0xff000000;
+
+// Aligns an address with an offset to the next page.
+// Doesn't change addresses that are already aligned.
+fn pageAlign(addr: usize) usize {
+    return (addr + (page_size - 1)) & ~(page_size - 1);
+}
+
+pub const AllocError = error{
+    ZeroSize,
+    OutOfMemory,
+    OutOfRange,
+    DoubleFree,
+    AlreadyTaken,
+};
+
+pub const TableError = error{
+    NotALeaf,
+};
+
+pub const Mode = enum(u4) {
+    bare = 0,
+    sv39 = 8,
+    sv48 = 9,
+};
+
+// SATP register, configures and enables the MMU (and thus paging).
+pub const Satp = packed struct(usize) {
+    pub const Asid = u16;
+
+    // Reverse field order so that @bitCast yields a usize with the right order.
+    // Without this writing the value to the SATP register enables an invalid page table,
+    // leaves the MMU disabled or causes other bugs.
+    ppn: u44,
+    asid: Asid,
+    mode: Mode,
+};
+
+// A page descriptor for use by the heap allocator.
+pub const Page = struct {
+    flags: Flags,
+
+    pub const Flags = packed struct {
+        active: u1,
+        last: u1, // Last page of contiguous allocation
+
+        pub const clear = .{
+            .active = 0,
+            .last = 0,
+        };
+    };
+
+    // Marks a page as taken, optionally flagging it as the last page of an allocation.
+    // Fails if the page is already taken.
+    // Returns whether the operation was successful.
+    pub fn take(self: *Page, last: bool) !void {
+        if (@bitCast(self.flags.active)) return AllocError.AlreadyTaken;
+
+        self.flags.active = 1;
+        if (last) self.flags.last = 1;
+    }
+};
+
+// Returns the offset from the page base. Works with both physical and virtual addresses.
+// Offsets are never translated.
+fn offsetOf(addr: usize) usize {
+    // Offset is in bottom 12 bits of both physical and virtual addresses.
+    return addr & 0xfff;
+}
+
+// Returns the virtual page numbers of a virtual address by paging level.
+fn virtualPageNumbers(vaddr: usize) [3]usize {
+    // Virtual address format:
+    //
+    // VPN[2] | VPN[1] | VPN[0] | offset
+    // 9 bits | 9 bits | 9 bits | 12 bits
+    //
+    // Virtual page numbers are indexes into the page table of their level,
+    // i.e. VPN[2] is an index to the root page table on level 2
+    // whereas VPN[1] is an index to the page table on level 1 specified by VPN[2].
+    //
+    // Offsets are never translated.
+
+    return [3]usize{
+        (vaddr >> 12) & 0x1ff,
+        (vaddr >> 21) & 0x1ff,
+        (vaddr >> 30) & 0x1ff,
+    };
+}
+
+// Returns the physical page numbers of a physical address by paging level.
+fn physicalPageNumbers(paddr: usize) [3]usize {
+    // Physical address format:
+    //
+    // PPN[2]  | PPN[1] | PPN[0] | offset
+    // 26 bits | 9 bits | 9 bits | 12 bits
+    //
+    // PPN[i] is what to map VPN[i] to.
+    //
+    // Offsets are never translated.
+
+    return [3]usize{
+        (paddr >> 12) & 0x1ff,
+        (paddr >> 21) & 0x1ff,
+        (paddr >> 30) & 0x3ff_ffff,
+    };
+}
+
+// Returns the page numbers of an address as a single integer.
+fn pageNumber(addr: usize) usize {
+    return addr >> 12;
+}
+
+pub const EntryFlags = packed struct(u8) {
+    valid: u1,
+    read: u1,
+    write: u1,
+    exec: u1,
+    user: u1,
+    global: u1,
+    accessed: u1,
+    dirty: u1,
+
+    pub const branch = EntryFlags{
+        .valid = 1,
+        .read = 0,
+        .write = 0,
+        .exec = 0,
+        .user = 0,
+        .global = 0,
+        .accessed = 0,
+        .dirty = 0,
+    };
+
+    pub const readOnly = EntryFlags{
+        .valid = 1,
+        .read = 1,
+        .write = 0,
+        .exec = 0,
+        .user = 0,
+        .global = 0,
+        .accessed = 1,
+        .dirty = 0,
+    };
+
+    pub const readWrite = EntryFlags{
+        .valid = 1,
+        .read = 1,
+        .write = 1,
+        .exec = 0,
+        .user = 0,
+        .global = 0,
+        .accessed = 1,
+        .dirty = 1,
+    };
+
+    pub const readExec = EntryFlags{
+        .valid = 1,
+        .read = 1,
+        .write = 0,
+        .exec = 1,
+        .user = 0,
+        .global = 0,
+        .accessed = 1,
+        .dirty = 0,
+    };
+
+    pub const userReadOnly = EntryFlags{
+        .valid = 1,
+        .read = 1,
+        .write = 0,
+        .exec = 0,
+        .user = 1,
+        .global = 0,
+        .accessed = 1,
+        .dirty = 0,
+    };
+
+    pub const userReadWrite = EntryFlags{
+        .valid = 1,
+        .read = 1,
+        .write = 1,
+        .exec = 0,
+        .user = 1,
+        .global = 0,
+        .accessed = 1,
+        .dirty = 1,
+    };
+
+    pub const userReadExec = EntryFlags{
+        .valid = 1,
+        .read = 1,
+        .write = 0,
+        .exec = 1,
+        .user = 1,
+        .global = 0,
+        .accessed = 1,
+        .dirty = 0,
+    };
+
+    pub fn isLeaf(self: EntryFlags) bool {
+        return @bitCast(self.read) or @bitCast(self.write) or @bitCast(self.exec);
+    }
+};
+
+pub const Entry = packed struct(u64) {
+    // Reverse field order so that @bitCast yields a u64 with the right order.
+    // Without this writing the value to a page table creates an invalid entry,
+    // thus resulting in page faults or hanging.
+    flags: EntryFlags,
+    rsw: u2, // Reserved for supervisor use. Currently unused.
+    mapping: u44,
+    reserved: u10,
+
+    // Returns the physical page numbers to map to by paging level.
+    pub fn physicalPageNumbers(self: Entry) [3]usize {
+        // Mapping format:
+        //
+        // PPN[2]  | PPN[1] | PPN[0]
+        // 26 bits | 9 bits | 9 bits
+        //
+        // PPN[i] is what to map VPN[i] to.
+
+        return [3]usize{
+            self.mapping & 0x1ff,
+            (self.mapping >> 9) & 0x1ff,
+            (self.mapping >> 18) & 0x3ff_ffff,
+        };
+    }
+
+    pub fn mappingAddr(self: Entry) usize {
+        // Apply an offset of zero since entries always point to an aligned page
+        // and this function should return a usable memory address.
+        // Callers can change the offset if needed.
+        return self.mapping << 12;
+    }
+
+    pub fn isValid(self: Entry) bool {
+        return @bitCast(self.flags.valid);
+    }
+
+    // Returns whether the entry is a mapping (true) or another page table (false).
+    pub fn isLeaf(self: Entry) bool {
+        return self.flags.isLeaf();
+    }
+};
+
+pub const Table = struct {
+    // Do not add any fields. The unmap function relies on mappings pointing to page tables,
+    // casting them to this data structure. This cast becomes invalid if additional fields
+    // are added, especially if they preceed the entries field.
+
+    entries: [512]Entry,
+
+    // Create a mapping of a certain virtual page address to a physical page address,
+    // discarding offsets. The mapping is written to the specified level,
+    // creating page tables as needed.
+    //
+    // The mapping must be a leaf, meaning that passing flags
+    // that indicate no access permissions at all will return an error.
+    //
+    // This function internally uses zeroedAlloc to allocate memory for the required page tables,
+    // but assumes that the physical address to map to has already been allocated by the caller.
+    pub fn map(root: *Table, vaddr: usize, paddr: usize, flags: EntryFlags, level: usize) !void {
+        if (!flags.isLeaf()) return TableError.NotALeaf;
+
+        const vpn = virtualPageNumbers(vaddr);
+
+        // Grab the entry in the root (level 2) page table.
+        var v = &root.entries[vpn[2]];
+
+        // Walk the page table levels from high to low under the assumption that root is valid.
+        for (level..2) |iInv| {
+            const i = 1 - iInv;
+
+            // If this entry doesn't point to a lower-level page table or memory page yet,
+            // allocate one.
+            if (!v.isValid()) {
+                const page = try zeroedAlloc(1);
+                v.* = .{
+                    .flags = EntryFlags.branch,
+                    .rsw = 0,
+                    .mapping = @intCast(pageNumber(@intFromPtr(page))), // Remove the offset, a mapping is just the PPN.
+                    .reserved = 0,
+                };
+            }
+
+            // Get the entries of the existing or newly created page table.
+            // This cast is safe because the only field of a Table is its entries.
+            const table: *Table = @ptrFromInt(v.mappingAddr());
+            // Grab the entry of the table by indexing it according to the corresponding VPN.
+            v = &table.entries[vpn[i]];
+        }
+
+        // Write the actual mapping to the correct table on the requested level.
+        v.* = .{
+            .flags = flags,
+            .rsw = 0,
+            .mapping = @intCast(pageNumber(paddr)), // Remove the offset, a mapping is just the PPN.
+            .reserved = 0,
+        };
+    }
+
+    // Deallocate child page tables recursively. The provided table itself is not affected,
+    // allowing partial unmapping of multi-level tables.
+    //
+    // This function does not deallocate memory pages mapped by the provided table
+    // or any of its (recursive) children.
+    pub fn unmap(table: *Table) void {
+        for (table.entries) |entry| {
+            if (entry.isValid() and !entry.isLeaf()) {
+                // This cast is safe because the only field of a Table is its entries.
+                const lowerLevelTable: *Table = @ptrFromInt(entry.mappingAddr());
+                lowerLevelTable.unmap();
+                entry.flags.valid = 0;
+                free(lowerLevelTable);
+            }
+        }
+    }
+
+    // Returns the physical address to a virtual address using the provided level 2 page table.
+    // This can be used to access virtual addresses whose page table isn't active
+    // in the MMU / SATP CSR (Control and Status Register), making it possible
+    // to access the memory space of a user mode process (from its perspective)
+    // from supervisor mode cleanly.
+    //
+    // The absence of a return value is equivalent to a page fault.
+    pub fn translate(root: *const Table, vaddr: usize) ?usize {
+        const vpn = virtualPageNumbers(vaddr);
+
+        // Grab the entry in the root (level 2) page table.
+        var v = &root.entries[vpn[2]];
+
+        // Walk the page table levels from high to low.
+        for (0..3) |iInv| {
+            const i = 2 - iInv;
+
+            if (!v.isValid()) {
+                break;
+            } else if (v.isLeaf()) {
+                // Mapping found.
+
+                // Create a mask starting directly below / after PN[i].
+                // Since all levels can have leaves i is not guaranteed to be zero.
+                const offsetMask = (@as(usize, 1) << @intCast(12 + 9 * i)) - 1;
+                const offset = vaddr & offsetMask;
+                const ppnJoined = v.mappingAddr() & ~offsetMask;
+
+                return ppnJoined | offset;
+            }
+
+            // Get the entries of the page table of the current level.
+            const entry: *[512]Entry = @ptrFromInt(v.mappingAddr());
+            // Grab the entry of the table by indexing it according to the corresponding VPN.
+            v = &entry[vpn[i - 1]];
+        }
+
+        return null;
+    }
+
+    // Creates an identity mapping for all pages needed for the specified range
+    // using the map function. An identity mapping doesn't actually translate
+    // memory addresses, virtual addresses and physical addresses are the same.
+    //
+    // The start address is inclusive while end is exclusive.
+    //
+    // This is still useful because it can be used to prevent the kernel
+    // from accessing machine-reserved memory by accident.
+    pub fn identityMapRange(root: *Table, start: usize, end: usize, flags: EntryFlags) !void {
+        // Mask out the offset within the starting page.
+        const startPage = start & ~(page_size - 1);
+        // Mask out the offset within the ending page, but ensure the returned page address
+        // is always the last required page for the mapping (end is exclusive,
+        // so subtracting 1 ends up in the previous page on boundaries,
+        // eliminating one useless mapping). The resulting value is inclusive.
+        const endPage = (end - 1) & ~(page_size - 1);
+
+        var page = startPage;
+        while (page <= endPage) : (page += page_size) {
+            try root.map(page, page, flags, 0);
+        }
+    }
+
+    // Constructs the SATP register value needed to activate the specified page table
+    // using the provided Address Space Identifier (ASID).
+    //
+    // The kernel page table always has ASID 0 (not mandated by the RISC-V specification).
+    pub fn satp(root: *const Table, asid: Satp.Asid) Satp {
+        return .{
+            .ppn = @intCast(pageNumber(@intFromPtr(root))),
+            .asid = asid,
+            .mode = .sv39,
+        };
+    }
+
+    pub fn mapKernel(root: *Table) !void {
+        try root.identityMapRange(@intFromPtr(text_start), @intFromPtr(text_end), EntryFlags.readExec);
+        try root.identityMapRange(@intFromPtr(rodata_start), @intFromPtr(rodata_end), EntryFlags.readOnly);
+        try root.identityMapRange(@intFromPtr(data_start), @intFromPtr(data_end), EntryFlags.readWrite);
+        try root.identityMapRange(@intFromPtr(sdata_start), @intFromPtr(sdata_end), EntryFlags.readWrite);
+        try root.identityMapRange(@intFromPtr(bss_start), @intFromPtr(bss_end), EntryFlags.readWrite);
+        try root.identityMapRange(@intFromPtr(sbss_start), @intFromPtr(sbss_end), EntryFlags.readWrite);
+        try root.identityMapRange(@intFromPtr(stack_start), @intFromPtr(stack_end), EntryFlags.readWrite);
+        try root.identityMapRange(@intFromPtr(stvec_stack_start), @intFromPtr(stvec_stack_end), EntryFlags.readWrite);
+        try root.identityMapRange(@intFromPtr(heap_start), @intFromPtr(heap_end), EntryFlags.readWrite);
+    }
+
+    pub fn mapDevice(root: *Table, reg: *hwinfo.Reg) !void {
+        const physical_start = reg.start & ~(page_size - 1);
+        const physical_end = (reg.start + reg.len - 1) & ~(page_size - 1);
+
+        reg.addr = next_mmio_vaddr | (reg.start & (page_size - 1));
+
+        var paddr = physical_start;
+        while (paddr <= physical_end) : (paddr += page_size) {
+            try root.map(next_mmio_vaddr, paddr, EntryFlags.readWrite, 0);
+            next_mmio_vaddr += page_size;
+        }
+    }
+};
+
+pub fn init() void {
+    const num_pages = heapSize() / page_size;
+    const pages: [*]Page = @ptrCast(heap_start);
+
+    for (0..num_pages) |i| {
+        pages[i].flags = Page.Flags.clear;
+    }
+}
+
+// Allocate memory pages. Passing n <= 0 results in an error.
+pub fn alloc(n: usize) !*void {
+    if (n <= 0) return AllocError.ZeroSize;
+
+    const num_pages = heapSize() / page_size;
+    // Start allocating beyond page descriptors.
+    const alloc_start = pageAlign(@intFromPtr(heap_start) + num_pages * @sizeOf(Page));
+
+    const pages: [*]Page = @ptrCast(heap_start);
+
+    // Iterate over potential starting points.
+    // The subtraction of n prevents unnecessary iterations for starting points
+    // that don't leave enough space for the whole allocation.
+    for (0..num_pages - n) |i| {
+        if (!@bitCast(pages[i].flags.active)) {
+            // Free starting page found.
+
+            var insufficient = false;
+
+            // Check if there is enough contiguous free space for the whole allocation.
+            // If not, move on to the next potential starting point.
+            for (i..n + i) |j| {
+                if (@bitCast(pages[j].flags.active)) {
+                    insufficient = true;
+                    break;
+                }
+            }
+
+            if (!insufficient) {
+                // Mark all allocated pages as taken.
+                for (i..n + i - 1) |j| {
+                    try pages[j].take(false);
+                }
+                try pages[n + i - 1].take(true);
+
+                // Construct a pointer to the first page using its descriptor number.
+                return @ptrFromInt(alloc_start + i * page_size);
+            }
+        }
+    }
+
+    return AllocError.OutOfMemory;
+}
+
+// Free (contiguous) memory page(s). Provides limited protection against double-frees.
+pub fn free(ptr: *void) !void {
+    const num_pages = heapSize() / page_size;
+    // Start allocating beyond page descriptors.
+    const alloc_start = pageAlign(@intFromPtr(heap_start) + num_pages * @sizeOf(Page));
+
+    // Restore the address to the page descriptor flags from the address of its contents
+    // by restoring the descriptor number and indexing the descriptor table
+    // at the start of the heap using it.
+    const addr = @intFromPtr(heap_start) + (@intFromPtr(ptr) - alloc_start) / page_size;
+
+    // Ensure basic address sanity.
+    // Does not check descriptor table bounds.
+    if (addr < @intFromPtr(heap_start) or addr >= @intFromPtr(heap_start) + heapSize()) return AllocError.OutOfRange;
+
+    var page: [*]Page = @ptrFromInt(addr);
+
+    // Mark all but the last page as free.
+    // A double-free check is performed on the last page before it is freed.
+    while (@bitCast(page[0].flags.active) and !@bitCast(page[0].flags.last)) : (page += 1) {
+        page[0].flags = Page.Flags.clear;
+    }
+
+    // Free page encountered, but it isn't marked as the last. Potential double-free.
+    if (!@bitCast(page[0].flags.last)) return AllocError.DoubleFree;
+
+    // Mark the last page as free.
+    page[0].flags = Page.Flags.clear;
+}
+
+// Allocate memory pages and overwrite their contents with zeroes for added security.
+// Passing n <= 0 results in an error.
+pub fn zeroedAlloc(n: usize) !*void {
+    const ret = try alloc(n);
+
+    // Write zeroes in batches of 64-bit to reduce the amount of store instructions.
+    // The remainder / remaining bytes don't need to be accounted for
+    // because page_size (4096) is divisible by 8.
+
+    const size = (n * page_size) / 8;
+    const ptr: [*]volatile u64 = @alignCast(@ptrCast(ret));
+
+    for (0..size) |i| {
+        ptr[i] = 0;
+    }
+
+    return ret;
+}