diff options
Diffstat (limited to 'src/lib/paging.zig')
-rw-r--r-- | src/lib/paging.zig | 557 |
1 files changed, 557 insertions, 0 deletions
diff --git a/src/lib/paging.zig b/src/lib/paging.zig new file mode 100644 index 0000000..1e785e7 --- /dev/null +++ b/src/lib/paging.zig @@ -0,0 +1,557 @@ +// SPDX-FileCopyrightText: 2024 Himbeer <himbeer@disroot.org> +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +// This is an implementation of Sv39 paging, meaning that the virtual addresses +// are 39 bits wide. Sv32 and Sv48 are currently not implemented. + +const hwinfo = @import("hwinfo.zig"); + +// Defined by linker script. +pub const text_start = @extern(*anyopaque, .{ .name = "_text_start" }); +pub const text_end = @extern(*anyopaque, .{ .name = "_text_end" }); +pub const rodata_start = @extern(*anyopaque, .{ .name = "_rodata_start" }); +pub const rodata_end = @extern(*anyopaque, .{ .name = "_rodata_end" }); +pub const data_start = @extern(*anyopaque, .{ .name = "_data_start" }); +pub const data_end = @extern(*anyopaque, .{ .name = "_data_end" }); +pub const sdata_start = @extern(*anyopaque, .{ .name = "_sdata_start" }); +pub const sdata_end = @extern(*anyopaque, .{ .name = "_sdata_end" }); +pub const bss_start = @extern(*anyopaque, .{ .name = "_bss_start" }); +pub const bss_end = @extern(*anyopaque, .{ .name = "_bss_end" }); +pub const sbss_start = @extern(*anyopaque, .{ .name = "_sbss_start" }); +pub const sbss_end = @extern(*anyopaque, .{ .name = "_sbss_end" }); +pub const stack_start = @extern(*anyopaque, .{ .name = "_stack_start" }); +pub const stack_end = @extern(*anyopaque, .{ .name = "_stack_end" }); +pub const stvec_stack_start = @extern(*anyopaque, .{ .name = "_stvec_stack_start" }); +pub const stvec_stack_end = @extern(*anyopaque, .{ .name = "_stvec_stack_end" }); +pub const heap_start = @extern(*anyopaque, .{ .name = "_heap_start" }); +pub const heap_end = @extern(*anyopaque, .{ .name = "_heap_end" }); + +inline fn heapSize() usize { + return @intFromPtr(heap_end) - @intFromPtr(heap_start); +} + +pub const page_size: usize = 0x1000; // 4096 bytes + +pub var next_mmio_vaddr: usize = 0xff000000; + +// Aligns an address with an offset to the next page. +// Doesn't change addresses that are already aligned. +fn pageAlign(addr: usize) usize { + return (addr + (page_size - 1)) & ~(page_size - 1); +} + +pub const AllocError = error{ + ZeroSize, + OutOfMemory, + OutOfRange, + DoubleFree, + AlreadyTaken, +}; + +pub const TableError = error{ + NotALeaf, +}; + +pub const Mode = enum(u4) { + bare = 0, + sv39 = 8, + sv48 = 9, +}; + +// SATP register, configures and enables the MMU (and thus paging). +pub const Satp = packed struct(usize) { + pub const Asid = u16; + + // Reverse field order so that @bitCast yields a usize with the right order. + // Without this writing the value to the SATP register enables an invalid page table, + // leaves the MMU disabled or causes other bugs. + ppn: u44, + asid: Asid, + mode: Mode, +}; + +// A page descriptor for use by the heap allocator. +pub const Page = struct { + flags: Flags, + + pub const Flags = packed struct { + active: u1, + last: u1, // Last page of contiguous allocation + + pub const clear = .{ + .active = 0, + .last = 0, + }; + }; + + // Marks a page as taken, optionally flagging it as the last page of an allocation. + // Fails if the page is already taken. + // Returns whether the operation was successful. + pub fn take(self: *Page, last: bool) !void { + if (@bitCast(self.flags.active)) return AllocError.AlreadyTaken; + + self.flags.active = 1; + if (last) self.flags.last = 1; + } +}; + +// Returns the offset from the page base. Works with both physical and virtual addresses. +// Offsets are never translated. +fn offsetOf(addr: usize) usize { + // Offset is in bottom 12 bits of both physical and virtual addresses. + return addr & 0xfff; +} + +// Returns the virtual page numbers of a virtual address by paging level. +fn virtualPageNumbers(vaddr: usize) [3]usize { + // Virtual address format: + // + // VPN[2] | VPN[1] | VPN[0] | offset + // 9 bits | 9 bits | 9 bits | 12 bits + // + // Virtual page numbers are indexes into the page table of their level, + // i.e. VPN[2] is an index to the root page table on level 2 + // whereas VPN[1] is an index to the page table on level 1 specified by VPN[2]. + // + // Offsets are never translated. + + return [3]usize{ + (vaddr >> 12) & 0x1ff, + (vaddr >> 21) & 0x1ff, + (vaddr >> 30) & 0x1ff, + }; +} + +// Returns the physical page numbers of a physical address by paging level. +fn physicalPageNumbers(paddr: usize) [3]usize { + // Physical address format: + // + // PPN[2] | PPN[1] | PPN[0] | offset + // 26 bits | 9 bits | 9 bits | 12 bits + // + // PPN[i] is what to map VPN[i] to. + // + // Offsets are never translated. + + return [3]usize{ + (paddr >> 12) & 0x1ff, + (paddr >> 21) & 0x1ff, + (paddr >> 30) & 0x3ff_ffff, + }; +} + +// Returns the page numbers of an address as a single integer. +fn pageNumber(addr: usize) usize { + return addr >> 12; +} + +pub const EntryFlags = packed struct(u8) { + valid: u1, + read: u1, + write: u1, + exec: u1, + user: u1, + global: u1, + accessed: u1, + dirty: u1, + + pub const branch = EntryFlags{ + .valid = 1, + .read = 0, + .write = 0, + .exec = 0, + .user = 0, + .global = 0, + .accessed = 0, + .dirty = 0, + }; + + pub const readOnly = EntryFlags{ + .valid = 1, + .read = 1, + .write = 0, + .exec = 0, + .user = 0, + .global = 0, + .accessed = 1, + .dirty = 0, + }; + + pub const readWrite = EntryFlags{ + .valid = 1, + .read = 1, + .write = 1, + .exec = 0, + .user = 0, + .global = 0, + .accessed = 1, + .dirty = 1, + }; + + pub const readExec = EntryFlags{ + .valid = 1, + .read = 1, + .write = 0, + .exec = 1, + .user = 0, + .global = 0, + .accessed = 1, + .dirty = 0, + }; + + pub const userReadOnly = EntryFlags{ + .valid = 1, + .read = 1, + .write = 0, + .exec = 0, + .user = 1, + .global = 0, + .accessed = 1, + .dirty = 0, + }; + + pub const userReadWrite = EntryFlags{ + .valid = 1, + .read = 1, + .write = 1, + .exec = 0, + .user = 1, + .global = 0, + .accessed = 1, + .dirty = 1, + }; + + pub const userReadExec = EntryFlags{ + .valid = 1, + .read = 1, + .write = 0, + .exec = 1, + .user = 1, + .global = 0, + .accessed = 1, + .dirty = 0, + }; + + pub fn isLeaf(self: EntryFlags) bool { + return @bitCast(self.read) or @bitCast(self.write) or @bitCast(self.exec); + } +}; + +pub const Entry = packed struct(u64) { + // Reverse field order so that @bitCast yields a u64 with the right order. + // Without this writing the value to a page table creates an invalid entry, + // thus resulting in page faults or hanging. + flags: EntryFlags, + rsw: u2, // Reserved for supervisor use. Currently unused. + mapping: u44, + reserved: u10, + + // Returns the physical page numbers to map to by paging level. + pub fn physicalPageNumbers(self: Entry) [3]usize { + // Mapping format: + // + // PPN[2] | PPN[1] | PPN[0] + // 26 bits | 9 bits | 9 bits + // + // PPN[i] is what to map VPN[i] to. + + return [3]usize{ + self.mapping & 0x1ff, + (self.mapping >> 9) & 0x1ff, + (self.mapping >> 18) & 0x3ff_ffff, + }; + } + + pub fn mappingAddr(self: Entry) usize { + // Apply an offset of zero since entries always point to an aligned page + // and this function should return a usable memory address. + // Callers can change the offset if needed. + return self.mapping << 12; + } + + pub fn isValid(self: Entry) bool { + return @bitCast(self.flags.valid); + } + + // Returns whether the entry is a mapping (true) or another page table (false). + pub fn isLeaf(self: Entry) bool { + return self.flags.isLeaf(); + } +}; + +pub const Table = struct { + // Do not add any fields. The unmap function relies on mappings pointing to page tables, + // casting them to this data structure. This cast becomes invalid if additional fields + // are added, especially if they preceed the entries field. + + entries: [512]Entry, + + // Create a mapping of a certain virtual page address to a physical page address, + // discarding offsets. The mapping is written to the specified level, + // creating page tables as needed. + // + // The mapping must be a leaf, meaning that passing flags + // that indicate no access permissions at all will return an error. + // + // This function internally uses zeroedAlloc to allocate memory for the required page tables, + // but assumes that the physical address to map to has already been allocated by the caller. + pub fn map(root: *Table, vaddr: usize, paddr: usize, flags: EntryFlags, level: usize) !void { + if (!flags.isLeaf()) return TableError.NotALeaf; + + const vpn = virtualPageNumbers(vaddr); + + // Grab the entry in the root (level 2) page table. + var v = &root.entries[vpn[2]]; + + // Walk the page table levels from high to low under the assumption that root is valid. + for (level..2) |iInv| { + const i = 1 - iInv; + + // If this entry doesn't point to a lower-level page table or memory page yet, + // allocate one. + if (!v.isValid()) { + const page = try zeroedAlloc(1); + v.* = .{ + .flags = EntryFlags.branch, + .rsw = 0, + .mapping = @intCast(pageNumber(@intFromPtr(page))), // Remove the offset, a mapping is just the PPN. + .reserved = 0, + }; + } + + // Get the entries of the existing or newly created page table. + // This cast is safe because the only field of a Table is its entries. + const table: *Table = @ptrFromInt(v.mappingAddr()); + // Grab the entry of the table by indexing it according to the corresponding VPN. + v = &table.entries[vpn[i]]; + } + + // Write the actual mapping to the correct table on the requested level. + v.* = .{ + .flags = flags, + .rsw = 0, + .mapping = @intCast(pageNumber(paddr)), // Remove the offset, a mapping is just the PPN. + .reserved = 0, + }; + } + + // Deallocate child page tables recursively. The provided table itself is not affected, + // allowing partial unmapping of multi-level tables. + // + // This function does not deallocate memory pages mapped by the provided table + // or any of its (recursive) children. + pub fn unmap(table: *Table) void { + for (table.entries) |entry| { + if (entry.isValid() and !entry.isLeaf()) { + // This cast is safe because the only field of a Table is its entries. + const lowerLevelTable: *Table = @ptrFromInt(entry.mappingAddr()); + lowerLevelTable.unmap(); + entry.flags.valid = 0; + free(lowerLevelTable); + } + } + } + + // Returns the physical address to a virtual address using the provided level 2 page table. + // This can be used to access virtual addresses whose page table isn't active + // in the MMU / SATP CSR (Control and Status Register), making it possible + // to access the memory space of a user mode process (from its perspective) + // from supervisor mode cleanly. + // + // The absence of a return value is equivalent to a page fault. + pub fn translate(root: *const Table, vaddr: usize) ?usize { + const vpn = virtualPageNumbers(vaddr); + + // Grab the entry in the root (level 2) page table. + var v = &root.entries[vpn[2]]; + + // Walk the page table levels from high to low. + for (0..3) |iInv| { + const i = 2 - iInv; + + if (!v.isValid()) { + break; + } else if (v.isLeaf()) { + // Mapping found. + + // Create a mask starting directly below / after PN[i]. + // Since all levels can have leaves i is not guaranteed to be zero. + const offsetMask = (@as(usize, 1) << @intCast(12 + 9 * i)) - 1; + const offset = vaddr & offsetMask; + const ppnJoined = v.mappingAddr() & ~offsetMask; + + return ppnJoined | offset; + } + + // Get the entries of the page table of the current level. + const entry: *[512]Entry = @ptrFromInt(v.mappingAddr()); + // Grab the entry of the table by indexing it according to the corresponding VPN. + v = &entry[vpn[i - 1]]; + } + + return null; + } + + // Creates an identity mapping for all pages needed for the specified range + // using the map function. An identity mapping doesn't actually translate + // memory addresses, virtual addresses and physical addresses are the same. + // + // The start address is inclusive while end is exclusive. + // + // This is still useful because it can be used to prevent the kernel + // from accessing machine-reserved memory by accident. + pub fn identityMapRange(root: *Table, start: usize, end: usize, flags: EntryFlags) !void { + // Mask out the offset within the starting page. + const startPage = start & ~(page_size - 1); + // Mask out the offset within the ending page, but ensure the returned page address + // is always the last required page for the mapping (end is exclusive, + // so subtracting 1 ends up in the previous page on boundaries, + // eliminating one useless mapping). The resulting value is inclusive. + const endPage = (end - 1) & ~(page_size - 1); + + var page = startPage; + while (page <= endPage) : (page += page_size) { + try root.map(page, page, flags, 0); + } + } + + // Constructs the SATP register value needed to activate the specified page table + // using the provided Address Space Identifier (ASID). + // + // The kernel page table always has ASID 0 (not mandated by the RISC-V specification). + pub fn satp(root: *const Table, asid: Satp.Asid) Satp { + return .{ + .ppn = @intCast(pageNumber(@intFromPtr(root))), + .asid = asid, + .mode = .sv39, + }; + } + + pub fn mapKernel(root: *Table) !void { + try root.identityMapRange(@intFromPtr(text_start), @intFromPtr(text_end), EntryFlags.readExec); + try root.identityMapRange(@intFromPtr(rodata_start), @intFromPtr(rodata_end), EntryFlags.readOnly); + try root.identityMapRange(@intFromPtr(data_start), @intFromPtr(data_end), EntryFlags.readWrite); + try root.identityMapRange(@intFromPtr(sdata_start), @intFromPtr(sdata_end), EntryFlags.readWrite); + try root.identityMapRange(@intFromPtr(bss_start), @intFromPtr(bss_end), EntryFlags.readWrite); + try root.identityMapRange(@intFromPtr(sbss_start), @intFromPtr(sbss_end), EntryFlags.readWrite); + try root.identityMapRange(@intFromPtr(stack_start), @intFromPtr(stack_end), EntryFlags.readWrite); + try root.identityMapRange(@intFromPtr(stvec_stack_start), @intFromPtr(stvec_stack_end), EntryFlags.readWrite); + try root.identityMapRange(@intFromPtr(heap_start), @intFromPtr(heap_end), EntryFlags.readWrite); + } + + pub fn mapDevice(root: *Table, reg: *hwinfo.Reg) !void { + const physical_start = reg.start & ~(page_size - 1); + const physical_end = (reg.start + reg.len - 1) & ~(page_size - 1); + + reg.addr = next_mmio_vaddr | (reg.start & (page_size - 1)); + + var paddr = physical_start; + while (paddr <= physical_end) : (paddr += page_size) { + try root.map(next_mmio_vaddr, paddr, EntryFlags.readWrite, 0); + next_mmio_vaddr += page_size; + } + } +}; + +pub fn init() void { + const num_pages = heapSize() / page_size; + const pages: [*]Page = @ptrCast(heap_start); + + for (0..num_pages) |i| { + pages[i].flags = Page.Flags.clear; + } +} + +// Allocate memory pages. Passing n <= 0 results in an error. +pub fn alloc(n: usize) !*void { + if (n <= 0) return AllocError.ZeroSize; + + const num_pages = heapSize() / page_size; + // Start allocating beyond page descriptors. + const alloc_start = pageAlign(@intFromPtr(heap_start) + num_pages * @sizeOf(Page)); + + const pages: [*]Page = @ptrCast(heap_start); + + // Iterate over potential starting points. + // The subtraction of n prevents unnecessary iterations for starting points + // that don't leave enough space for the whole allocation. + for (0..num_pages - n) |i| { + if (!@bitCast(pages[i].flags.active)) { + // Free starting page found. + + var insufficient = false; + + // Check if there is enough contiguous free space for the whole allocation. + // If not, move on to the next potential starting point. + for (i..n + i) |j| { + if (@bitCast(pages[j].flags.active)) { + insufficient = true; + break; + } + } + + if (!insufficient) { + // Mark all allocated pages as taken. + for (i..n + i - 1) |j| { + try pages[j].take(false); + } + try pages[n + i - 1].take(true); + + // Construct a pointer to the first page using its descriptor number. + return @ptrFromInt(alloc_start + i * page_size); + } + } + } + + return AllocError.OutOfMemory; +} + +// Free (contiguous) memory page(s). Provides limited protection against double-frees. +pub fn free(ptr: *void) !void { + const num_pages = heapSize() / page_size; + // Start allocating beyond page descriptors. + const alloc_start = pageAlign(@intFromPtr(heap_start) + num_pages * @sizeOf(Page)); + + // Restore the address to the page descriptor flags from the address of its contents + // by restoring the descriptor number and indexing the descriptor table + // at the start of the heap using it. + const addr = @intFromPtr(heap_start) + (@intFromPtr(ptr) - alloc_start) / page_size; + + // Ensure basic address sanity. + // Does not check descriptor table bounds. + if (addr < @intFromPtr(heap_start) or addr >= @intFromPtr(heap_start) + heapSize()) return AllocError.OutOfRange; + + var page: [*]Page = @ptrFromInt(addr); + + // Mark all but the last page as free. + // A double-free check is performed on the last page before it is freed. + while (@bitCast(page[0].flags.active) and !@bitCast(page[0].flags.last)) : (page += 1) { + page[0].flags = Page.Flags.clear; + } + + // Free page encountered, but it isn't marked as the last. Potential double-free. + if (!@bitCast(page[0].flags.last)) return AllocError.DoubleFree; + + // Mark the last page as free. + page[0].flags = Page.Flags.clear; +} + +// Allocate memory pages and overwrite their contents with zeroes for added security. +// Passing n <= 0 results in an error. +pub fn zeroedAlloc(n: usize) !*void { + const ret = try alloc(n); + + // Write zeroes in batches of 64-bit to reduce the amount of store instructions. + // The remainder / remaining bytes don't need to be accounted for + // because page_size (4096) is divisible by 8. + + const size = (n * page_size) / 8; + const ptr: [*]volatile u64 = @alignCast(@ptrCast(ret)); + + for (0..size) |i| { + ptr[i] = 0; + } + + return ret; +} |