diff options
author | Matthew Schauer <matthew.schauer@e10x.net> | 2020-11-22 12:24:19 -0800 |
---|---|---|
committer | Matthew Schauer <matthew.schauer@e10x.net> | 2021-04-07 21:05:12 -0700 |
commit | 608a26a4f1d9aa880e44c7d56a8f928f53e24b2b (patch) | |
tree | da8afc500d440dfae46f9ec98774b27b1fea459d | |
parent | ae93dc45e82a1e92a9c07c85fdb76d33ce56bf91 (diff) |
Document public interface
-rw-r--r-- | src/lib.rs | 75 | ||||
-rw-r--r-- | src/read.rs | 218 | ||||
-rw-r--r-- | src/write.rs | 235 |
3 files changed, 494 insertions, 34 deletions
@@ -1,3 +1,25 @@ +//! This crate provides Rust bindings for the [squashfs-tools-ng][] library, providing support for +//! SquashFS as an embeddable archive format without the need for kernel support. It also tries to +//! provide a level of safety and abstraction on top of the C library. Cross-platform usability is a +//! secondary goal. +//! +//! # Installation +//! +//! Currently, the underlying [squashfs-tools-ng][] library must be installed on the system both to +//! build and to use this library. The development headers (`/usr/include/sqfs/...`) are required +//! to build, and the shared library (`/usr/lib/libsquashfs.so`) to run. The project's GitHub page +//! asserts that packages are available in many Linux distributions' repositories. +//! +//! Once the dependencies are in place, this should function like most other Rust libraries, and +//! `cargo build` should suffice to build the library. +//! +//! # Usage +//! +//! The [`read`] and [`write`](module@write) modules below provide support for reading and writing +//! SquashFS files, respectively. Check them out for further documentation. +//! +//! [squashfs-tools-ng]: https://github.com/AgentD/squashfs-tools-ng/ + #[macro_use] extern crate lazy_static; extern crate libc; extern crate memmap; @@ -11,7 +33,6 @@ use std::mem::MaybeUninit; use std::ffi::{OsStr, OsString}; use std::path::PathBuf; use std::ptr; -use bindings::*; use num_derive::FromPrimitive; use num_traits::FromPrimitive; use thiserror::Error; @@ -24,33 +45,44 @@ mod bindings { include!(concat!(env!("OUT_DIR"), "/bindings.rs")); } +use bindings::*; + pub mod read; pub mod write; type BoxedError = Box<dyn std::error::Error + std::marker::Send + std::marker::Sync>; +/// Errors raised by the underlying library. +/// +/// This error type reflects all errors raised by the squashfs-tools-ng library. This should +/// always be wrapped in a [`SquashfsError`] before being returned from any of the functions in +/// this library. #[derive(Error, Debug, FromPrimitive)] #[repr(i32)] pub enum LibError { - #[error("Failed to allocate memory")] Alloc = -1, - #[error("Generic I/O failure occurred")] Io = -2, - #[error("Compressor failed to extract data")] Compressor = -3, - #[error("Internal error occurred")] Internal = -4, - #[error("Archive file appears to be corrupted")] Corrupted = -5, - #[error("Unsupported feature used")] Unsupported = -6, - #[error("Archive would overflow memory")] Overflow = -7, - #[error("Out-of-bounds access attempted")] OutOfBounds = -8, - #[error("Superblock magic number incorrect")] SuperMagic = -9, - #[error("Unsupported archive version")] SuperVersion = -10, - #[error("Archive block size is invalid")] SuperBlockSize = -11, - #[error("Not a directory")] NotDir = -12, - #[error("Path does not exist")] NoEntry = -13, - #[error("Hard link loop detected")] LinkLoop = -14, - #[error("Not a regular file")] NotFile = -15, - #[error("Invalid argument passed")] ArgInvalid = -16, - #[error("Library operations performed in incorrect order")] Sequence = -17, -} - + #[error("Failed to allocate memory")] Alloc = SQFS_ERROR_SQFS_ERROR_ALLOC, + #[error("Generic I/O failure occurred")] Io = SQFS_ERROR_SQFS_ERROR_IO, + #[error("Compressor failed to extract data")] Compressor = SQFS_ERROR_SQFS_ERROR_COMPRESSOR, + #[error("Internal error occurred")] Internal = SQFS_ERROR_SQFS_ERROR_INTERNAL, + #[error("Archive file appears to be corrupted")] Corrupted = SQFS_ERROR_SQFS_ERROR_CORRUPTED, + #[error("Unsupported feature used")] Unsupported = SQFS_ERROR_SQFS_ERROR_UNSUPPORTED, + #[error("Archive would overflow memory")] Overflow = SQFS_ERROR_SQFS_ERROR_OVERFLOW, + #[error("Out-of-bounds access attempted")] OutOfBounds = SQFS_ERROR_SQFS_ERROR_OUT_OF_BOUNDS, + #[error("Superblock magic number incorrect")] SuperMagic = SQFS_ERROR_SFQS_ERROR_SUPER_MAGIC, + #[error("Unsupported archive version")] SuperVersion = SQFS_ERROR_SFQS_ERROR_SUPER_VERSION, + #[error("Archive block size is invalid")] SuperBlockSize = SQFS_ERROR_SQFS_ERROR_SUPER_BLOCK_SIZE, + #[error("Not a directory")] NotDir = SQFS_ERROR_SQFS_ERROR_NOT_DIR, + #[error("Path does not exist")] NoEntry = SQFS_ERROR_SQFS_ERROR_NO_ENTRY, + #[error("Hard link loop detected")] LinkLoop = SQFS_ERROR_SQFS_ERROR_LINK_LOOP, + #[error("Not a regular file")] NotFile = SQFS_ERROR_SQFS_ERROR_NOT_FILE, + #[error("Invalid argument passed")] ArgInvalid = SQFS_ERROR_SQFS_ERROR_ARG_INVALID, + #[error("Library operations performed in incorrect order")] Sequence = SQFS_ERROR_SQFS_ERROR_SEQUENCE, +} + +/// Errors encountered while reading or writing an archive. +/// +/// This wraps all errors that might be encountered by the library during its normal course of +/// operation. #[derive(Error, Debug)] pub enum SquashfsError { #[error("Input contains an invalid null character")] NullInput(#[from] std::ffi::NulError), @@ -79,7 +111,8 @@ pub enum SquashfsError { #[error("Tried to add files to a writer that was already finished")] Finished, } -type Result<T> = std::result::Result<T, SquashfsError>; +/// Result type returned by SquashFS library operations. +pub type Result<T> = std::result::Result<T, SquashfsError>; fn sfs_check(code: i32, desc: &str) -> Result<i32> { match code { diff --git a/src/read.rs b/src/read.rs index e708f12..6b2c6fb 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,3 +1,22 @@ +//! Facilities for reading SquashFS archives. +//! +//! The most common scenario for using this library is: +//! +//! 1. To open a SquashFS file, use [`Archive::new`]. +//! 2. Call [`get`](Archive::get) to retrieve a [`Node`] by its path. +//! 3. Call [`data`](Node::data) to get a [`Data`] object containing the node's data. +//! +//! `Node` also provides methods for inspecting metadata, resolving symlinks, and conveniently +//! converting to file and directory objects. +//! +//! let archive = Archive::open("archive.sfs")?; +//! match archive.get("/etc/passwd")? { +//! None => println!("File not present"), +//! Some(node) => if let Data::File(file) = node.data()? { +//! println!("{}", file.to_string()?); +//! }, +//! } + use std::collections::{HashMap, HashSet}; use std::ffi::{CStr, CString}; use std::io; @@ -32,6 +51,21 @@ fn enoent_ok<T>(t: Result<T>) -> Result<Option<T>> { } } +/// A directory in the archive. +/// +/// Directory objects are obtained by calling the [`data`](Node::data) or [`as_dir`](Node::as_dir) +/// method on a [`Node`] object. `Dir` implements [`Iterator`](std::iter::Iterator), so all +/// children can be retrieved just by iterating over the directory. The iterator can be reset by +/// calling [`reset`](Self::reset). Individual children can also be retrieved by name using +/// [`child`](Self::child). +/// +/// let archive = Archive::new("archive.sfs")?; +/// let node = archive.get("/my-dir")?.expect("/my-dir does not exist").resolve()?; +/// let dir = node.as_dir()?; +/// let child = dir.child("file.txt")?.expect("/my-dir/file.txt does not exist"); +/// for entry in dir { +/// println!("{}", entry?.name().unwrap()); +/// } #[derive(Debug)] pub struct Dir<'a> { node: &'a Node<'a>, @@ -49,6 +83,10 @@ impl<'a> Dir<'a> { Ok(Self { node: node, compressor: compressor, reader: Mutex::new(reader) }) } + /// Reset the directory reader to the beginning of the directory. + /// + /// If the directory has been partially or completely iterated through, this will put it back + /// to the beginning so that it can be read again. pub fn reset(&mut self) { unsafe { sqfs_dir_reader_rewind(**self.reader.lock().expect(LOCK_ERR)); } } @@ -69,6 +107,10 @@ impl<'a> Dir<'a> { } } + /// Select a child inside the directory by name. + /// + /// This will return `Ok(None)` if the child does not exist, or an `Err` if the lookup could + /// not be performed. pub fn child(&self, name: &str) -> Result<Option<Node>> { match unsafe { enoent_ok(sfs_check(sqfs_dir_reader_find(**self.reader.lock().expect(LOCK_ERR), CString::new(name)?.as_ptr()), &format!("Couldn't find child \"{}\"", name)))? } { None => Ok(None), @@ -85,6 +127,22 @@ impl<'a> std::iter::Iterator for Dir<'a> { } } +/// A file in the archive. +/// +/// `File` objects allow standard operations on file inodes in an archive. `File` implements +/// [`Read`] and [`Seek`], so anything that reads files using standard Rust semantics can interact +/// natively with these files. [`to_bytes`](Self::to_bytes) and [`to_string`](Self::to_string) +/// offer convenience wrappers around this. Files that were archived with compression and +/// fragmentation disabled can also be [`mmap`](Self::mmap)ed and accessed as an ordinary byte +/// array. +/// +/// let archive = Archive::new("archive.sfs")?; +/// let node = archive.get("/a/01.txt")?.unwrap().resolve()?; +/// let file = node.as_file()?; +/// // File can now be used like anything else that implements `Read` and `Seek`. +/// let mut buf = [0; 10]; +/// file.seek(SeekFrom::End(-10))?; +/// file.read(&mut buf)?; pub struct File<'a> { node: &'a Node<'a>, #[allow(dead_code)] compressor: ManagedPointer<sqfs_compressor_t>, // Referenced by `reader` @@ -102,24 +160,42 @@ impl<'a> File<'a> { Ok(Self { node: node, compressor: compressor, reader: Mutex::new(reader), offset: Mutex::new(0) }) } + /// Retrieve the size of the file in bytes. pub fn size(&self) -> u64 { let mut ret: u64 = 0; unsafe { sqfs_inode_get_file_size(self.node.inode.as_const(), &mut ret) }; ret } + /// Retrieve the entire contents of the file in the form of a byte Vec. pub fn to_bytes(&mut self) -> Result<Vec<u8>> { let mut ret = Vec::with_capacity(self.size() as usize); self.read_to_end(&mut ret)?; Ok(ret) } + /// Retrieve the entire contents of the file in the form of a String. + /// + /// This calls [`Read::read_to_string`] under the hood. Consequently, a UTF-8 error + /// will be raised if the entire file is not valid UTF-8. pub fn to_string(&mut self) -> Result<String> { let mut ret = String::with_capacity(self.size() as usize); self.read_to_string(&mut ret)?; Ok(ret) } + /// Map a file into memory for fast parallel random access. + /// + /// This uses `mmap` to map the file into memory. **It will fail** and return `None` if the + /// file is compressed or fragmented. If the [`DontCompress`](write::BlockFlags::DontCompress) + /// and [`DontFragment`](write::BlockFlags::DontFragment) options are set for a file at + /// archive creation time, it will be added to the archive in one contiguous unmodified chunk. + /// This is necessary because `mmap` provides a view into a file exactly as it is on-disk; + /// there is no opportunity for the library to apply decompression or other transformations + /// when mapping. + /// + /// let map = file.mmap().expect("File is not mmappable"); + /// println!("{}", str::from_utf8(map)?); pub fn mmap<'b>(&'b mut self) -> Option<&'b [u8]> { let inode = unsafe { &***self.node.inode }; let (start, frag_idx) = unsafe { @@ -174,14 +250,34 @@ impl<'a> std::fmt::Debug for File<'a> { } } +/// Enum type for the various kinds of data that an inode can be. +/// +/// This is retrieved by calling [`Node::data`] and can be matched to determine the type and +/// contents of a node. +/// +/// For accessing files and directories, [`Node`] provides the [`as_dir`](Node::as_dir) and +/// [`as_file`](Node::as_file) methods to bypass `Data` completely. #[derive(Debug)] pub enum Data<'a> { + /// A regular file, containing a [`File`] object that can be used to extract the file contents. File(File<'a>), + + /// A directory, containing a [`Dir`] that can be used to access the directory's children. Dir(Dir<'a>), - Symlink(String), + + /// A symbolic link, containing the target of the link as a [`PathBuf`]. + Symlink(PathBuf), + + /// A block device file, containing the device's major and minor numbers. BlockDev(u32, u32), + + /// A character device file, containing the device's major and minor numbers. CharDev(u32, u32), + + /// A named pipe. Fifo, + + /// A socket. Socket, } @@ -198,10 +294,12 @@ impl<'a> Data<'a> { SQFS_INODE_TYPE_SQFS_INODE_DIR | SQFS_INODE_TYPE_SQFS_INODE_EXT_DIR => Ok(Self::Dir(Dir::new(node)?)), SQFS_INODE_TYPE_SQFS_INODE_FILE | SQFS_INODE_TYPE_SQFS_INODE_EXT_FILE => Ok(Self::File(File::new(node)?)), SQFS_INODE_TYPE_SQFS_INODE_SLINK => Ok(unsafe { - Self::Symlink(arr_to_string(&(***node.inode).extra, (***node.inode).data.slink.target_size as usize)) + let path_str = arr_to_string(&(***node.inode).extra, (***node.inode).data.slink.target_size as usize); + Self::Symlink(PathBuf::from(path_str)) }), SQFS_INODE_TYPE_SQFS_INODE_EXT_SLINK => Ok(unsafe { - Self::Symlink(arr_to_string(&(***node.inode).extra, (***node.inode).data.slink_ext.target_size as usize)) + let path_str = arr_to_string(&(***node.inode).extra, (***node.inode).data.slink_ext.target_size as usize); + Self::Symlink(PathBuf::from(path_str)) }), SQFS_INODE_TYPE_SQFS_INODE_BDEV => Ok(unsafe { let (maj, min) = get_dev_nums((***node.inode).data.dev.devno); @@ -225,6 +323,8 @@ impl<'a> Data<'a> { } } + /// Get a human-readable English name for the type of file represented by this object, intended + /// primarily for debugging. pub fn name(&self) -> String { match self { Data::File(_) => "regular file", @@ -238,6 +338,7 @@ impl<'a> Data<'a> { } } +/// Represents the namespace of extended attributes. #[repr(u32)] #[derive(Clone, Copy)] pub enum XattrType { @@ -246,6 +347,17 @@ pub enum XattrType { Security = SQFS_XATTR_TYPE_SQFS_XATTR_SECURITY, } +/// An object packaging a [`File`] with the [`Node`] from which it was constructed. +/// +/// `File`s reference data in the `Node` objects that created them, so a `File` cannot be used +/// after its corresponding `Node` has been dropped. This object packages the two together, +/// creating an object that is valid for the lifetime of the owning `Archive`. +/// +/// This is a simple wrapper around an [`OwningHandle`] that re-implements the [`Read`] and +/// [`Seek`] traits so that it can still be treated as a file. [`Deref`](std::ops::Deref) and +/// [`DerefMut`](std::ops::DerefMut) are also available to access the contained file. +/// +/// Create an `OwnedFile` using [`Node::into_owned_file`]. pub struct OwnedFile<'a> { handle: OwningHandle<Box<Node<'a>>, Box<File<'a>>>, } @@ -276,6 +388,15 @@ impl<'a> std::ops::DerefMut for OwnedFile<'a> { } } +/// An object packaging a [`Dir`] with the [`Node`] from which it was constructed. +/// +/// `Dir`s retain references to the `Node`s that created them, so a `Dir` cannot be used after its +/// corresponding `Node` has been dropped. `OwnedDir` packages the two together, creating an +/// independent object with the same lifetime as its owning `Archive`. +/// +/// `OwnedDir` re-implements [`Iterator`](std::iter::Iterator) so that it can be iterated over just +/// like `Dir`. It also implements [`Deref`](std::ops::Deref) and [`DerefMut`](std::ops::DerefMut) +/// to allow access to the internal `Dir`. pub struct OwnedDir<'a> { handle: OwningHandle<Box<Node<'a>>, Box<Dir<'a>>>, } @@ -302,6 +423,17 @@ impl<'a> std::ops::DerefMut for OwnedDir<'a> { } } +/// Information about a single node in the directory tree. +/// +/// This corresponds to the inode and directory entry structures of the underlying library. +/// Because SquashFS inodes do not retain pointers back to their directory entries, inodes by +/// default have no information about their positions in the directory tree. To work around this, +/// `Node` structs store their path and propagate it through calls like [`child`](Dir::child) and +/// [`parent`](Self::parent). If the `Node` was originally constructed in a way that does not +/// provide path information, such as retrieving a node by inode number using [`Archive::get_id`], +/// then the methods that require knowledge of the node's location in the tree, such as +/// [`path`](Self::path) and [`parent`](Self::parent), will fail. For this reason, it is generally +/// recommended to get nodes by path when possible. pub struct Node<'a> { container: &'a Archive, path: Option<PathBuf>, @@ -313,6 +445,7 @@ impl<'a> Node<'a> { Ok(Self { container: container, path: path, inode: Arc::new(inode) }) } + /// Get a node's extended attributes in a given namespace as a map of byte Vecs. pub fn xattrs(&self, category: XattrType) -> Result<HashMap<Vec<u8>, Vec<u8>>> { if self.container.superblock.flags & SQFS_SUPER_FLAGS_SQFS_FLAG_NO_XATTRS as u16 != 0 { Ok(HashMap::new()) } // TODO The following line reflects what I think is a bug. I have a non-xattr archive @@ -353,14 +486,23 @@ impl<'a> Node<'a> { } } + /// Get the inode number of a node. + /// + /// This can be used to cheaply compare nodes for equality or can be later used with + /// [`get_id`](Archive::get_id) to retrieve nodes without traversing the directory tree. pub fn id(&self) -> u32 { unsafe { (***self.inode).base.inode_number } } + /// Retrieve the data stored at the node. pub fn data(&self) -> Result<Data> { Data::new(&self) } + /// Get the absolute path to the node in the archive. + /// + /// If the node was obtained in a way that did not provide path information, this will return + /// `None`. If the node was retrieved using [`Archive::get`], this should return `Some`. pub fn path(&self) -> Option<&Path> { self.path.as_ref().map(|path| path.as_path()) } @@ -372,10 +514,18 @@ impl<'a> Node<'a> { } } + /// A convenience method to retrieve the file name of the node from its path. + /// + /// As with [`path`](Self::path), if the node does not have embedded path information, this + /// will return `None`. pub fn name(&self) -> Option<String> { self.path.as_ref().map(|path| path.file_name().map(|x| x.to_string_lossy().to_string()).unwrap_or("/".to_string())) } + /// Get the parent directory node of the current node. + /// + /// If the node is the root of the tree, it will return a copy of itself. If this node was + /// created without path information, it will raise a [`NoPath`](SquashfsError::NoPath) error. pub fn parent(&self) -> Result<Self> { self.path.as_ref().map(|path| { let ppath = path.parent().unwrap_or(&Path::new("")); @@ -383,6 +533,10 @@ impl<'a> Node<'a> { }).ok_or(SquashfsError::NoPath)? } + /// Resolve symbolic links to their targets, raising an error if a target does not exist. + /// + /// This works the same way as [`resolve`](Self::resolve), except that an error is raised if + /// any link in the chain of symbolic links points at a path that does not exist. pub fn resolve_exists(&self) -> Result<Self> { let mut visited = HashSet::new(); let mut cur = Box::new(self.clone()); @@ -410,10 +564,20 @@ impl<'a> Node<'a> { } } + /// Resolve symbolic links to their targets. + /// + /// This follows the chain of symbolic links starting at the current node all the way to the + /// end, returning the final node, which is guaranteed not to be a symbolic link. If any link + /// in the chain points at a path that does not exist, it returns `Ok(None)`. If the current + /// node is not a sybmolic link, this returns a copy of itself. pub fn resolve(&self) -> Result<Option<Self>> { enoent_ok(self.resolve_exists()) } + /// Return true if the current `Node` is a file. + /// + /// This does *not* resolve symbolic links, and will return `false` when called on nodes that + /// are symbolic links to files. pub fn is_file(&self) -> Result<bool> { match self.data()? { Data::File(_) => Ok(true), @@ -421,6 +585,11 @@ impl<'a> Node<'a> { } } + /// Fetch the [`File`] object from the current `Node`. + /// + /// This is essentially a shortcut for `if let Data::File(file) = self.data()`. If this node + /// is not a regular file, this will return an error. This does *not* resolve symbolic links; + /// the caller should call [`resolve`](Self::resolve) first if the node could be a link. pub fn as_file(&self) -> Result<File> { match self.data()? { Data::File(f) => Ok(f), @@ -428,11 +597,20 @@ impl<'a> Node<'a> { } } + /// Convert the `Node` into an [`OwnedFile`]. + /// + /// This resolves symbolic links. If the current node is not a regular file or a link to one, + /// it will return an error. + /// + /// let archive = Archive::new("archive.sfs")?; + /// let mut buf = String::new(); + /// archive.get("/file.txt")?.unwrap().into_owned_file()?.read_to_string(&mut buf)?; pub fn into_owned_file(self) -> Result<OwnedFile<'a>> { let resolved = self.resolve_exists()?; Ok(OwnedFile { handle: OwningHandle::try_new(Box::new(resolved), |x| unsafe { (*x).as_file().map(|x| Box::new(x)) })? }) } + /// Return true if the current `Node` is a directory. pub fn is_dir(&self) -> Result<bool> { match self.data()? { Data::Dir(_) => Ok(true), @@ -440,6 +618,11 @@ impl<'a> Node<'a> { } } + /// Fetch the [`Dir`] object from the current `Node`. + /// + /// This is essentially a shortcut for `if let Data::Dir(dir) = self.data()`. If this node is + /// not a directory, it will return an error. This does *not* resolve symbolic links; the + /// caller should call [`resolve`](Self::resolve) first if the node could be a link. pub fn as_dir(&self) -> Result<Dir> { match self.data()? { Data::Dir(d) => Ok(d), @@ -447,25 +630,38 @@ impl<'a> Node<'a> { } } + /// Convert the `Node` into an [`OwnedDir`]. + /// + /// This resolves symbolic links. If the current node is not a directory or a link to one, it + /// will return an error. + /// + /// let archive = Archive::new("archive.sfs")?; + /// for child in archive.get("/dir")?.unwrap().into_owned_dir()? { + /// println!("{}", child?.name()); + /// } pub fn into_owned_dir(self) -> Result<OwnedDir<'a>> { let resolved = self.resolve_exists()?; Ok(OwnedDir { handle: OwningHandle::try_new(Box::new(resolved), |x| unsafe { (*x).as_dir().map(|x| Box::new(x)) })? }) } + /// Get the UID of the `Node`. pub fn uid(&self) -> Result<u32> { let idx = unsafe { (***self.inode).base.uid_idx }; self.container.id_lookup(idx) } + /// Get the GID of the `Node`. pub fn gid(&self) -> Result<u32> { let idx = unsafe { (***self.inode).base.gid_idx }; self.container.id_lookup(idx) } + /// Get the file mode of the `Node`. pub fn mode(&self) -> u16 { unsafe { (***self.inode).base.mode } } + /// Get the modification time of the `Node` as a UNIX timestamp. pub fn mtime(&self) -> u32 { unsafe { (***self.inode).base.mod_time } } @@ -490,6 +686,7 @@ impl<'a> std::fmt::Debug for Node<'a> { } } +/// An open SquashFS archive. pub struct Archive { path: PathBuf, file: ManagedPointer<sqfs_file_t>, @@ -499,6 +696,7 @@ pub struct Archive { } impl Archive { + /// Open an existing archive for reading. pub fn new<T: AsRef<Path>>(path: T) -> Result<Self> { let cpath = CString::new(os_to_string(path.as_ref().as_os_str())?)?; let file = sfs_init_check_null(&|| unsafe { @@ -516,7 +714,6 @@ impl Archive { Ok(Self { path: path.as_ref().to_path_buf(), file: file, superblock: superblock, compressor_config: compressor_config, mmap: (os_file, map) }) } - fn compressor(&self) -> Result<ManagedPointer<sqfs_compressor_t>> { Ok(sfs_init_ptr(&|x| unsafe { sqfs_compressor_create(&self.compressor_config, x) @@ -541,14 +738,17 @@ impl Archive { }, "Couldn't get ID from ID table")?) } + /// Retrieve the path with that was used to open the archive. pub fn path(&self) -> &Path { &self.path } + /// Get the number of inodes in the archive. pub fn size(&self) -> u32 { self.superblock.inode_count } + /// Get the [`Node`] located at the given path, raising an error if it does not exist. pub fn get_exists<T: AsRef<Path>>(&self, path: T) -> Result<Node> { let compressor = self.compressor()?; let dir_reader = sfs_init_check_null(&|| unsafe { @@ -570,10 +770,20 @@ impl Archive { } } + /// Get the [`Node`] located at the given path in the archive. + /// + /// If the path is not present, `Ok(None)` will be returned. pub fn get<T: AsRef<Path>>(&self, path: T) -> Result<Option<Node>> { enoent_ok(self.get_exists(path)) } + /// Get a node from the archive by its inode number. + /// + /// Each inode in an archive has a unique ID. If the archive was created with the "exportable" + /// option (intended for exporting over NFS), it is efficient to look up inodes by their IDs. + /// If this archive is not exportable, [`SquashfsError::Unsupported`] will be raised. A `Node` + /// obtained in this way will lack path information, and as such operations like getting its + /// file name or parent will fail. pub fn get_id(&self, id: u64) -> Result<Node> { if self.superblock.flags & SQFS_SUPER_FLAGS_SQFS_FLAG_EXPORTABLE as u16 == 0 { Err(SquashfsError::Unsupported("inode indexing".to_string()))?; } if id <= 0 || id > self.superblock.inode_count as u64 { Err(SquashfsError::Range(id, self.superblock.inode_count as u64))? } diff --git a/src/write.rs b/src/write.rs index fb15ed1..6c9c281 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1,3 +1,26 @@ +//! Facilities for writing SquashFS archives. +//! +//! The most straightforward way to write a SquashFS file from a directory tree on-disk is to use a +//! [`TreeProcessor`]. This provides the ability to make "last-minute" modifications to the files +//! that are added, such as skipping certain files or modifying metadata. +//! +//! To create a totally "synthetic" SquashFS file that is not built from files in a filesystem, +//! open a [`Writer`] and feed [`Source`]s to it. +//! +//! # Limitations +//! +//! This library does not yet handle hard links; files with multiple hard links will be archived as +//! separate files with identical contents (which should be deduplicated and end up taking up +//! little additional space). +//! +//! The SquashFS specification includes a field in directory inodes for the parent inode number, +//! presumably to make `..` directory entries work. This is one factor that makes it impossible to +//! build a SquashFS file without building out the entire directory tree to be archived in memory. +//! I have tried as hard as poassible to reduce the amount of data that must be stored for each +//! node added, and this architecture makes it infeasible to store parent inodes in directory +//! entries. I hope to fix this some day, and in the meantime it has not caused problems in the +//! ways I have used the resultant files. + use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::ffi::{CString, OsString}; @@ -9,33 +32,126 @@ use super::*; use super::SquashfsError; use walkdir::{DirEntry, WalkDir}; +/// Flags to fine-tune how an entry is added to the archive. +/// +/// These valued can be ORed together and passed in the [`flags`](Source::flags) field of a +/// [`Source`] object. #[repr(u32)] pub enum BlockFlags { + /// Don't compress file data. + /// + /// By default, files are compressed, and the compressed version is stored in the archive if it + /// is smaller than the uncompressed version. Setting this flag will force the file to be + /// stored uncompressed. DontCompress = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_COMPRESS, + + /// Align the file data to device blocks. + /// + /// If set, padding will be added before and after this file's data blocks so that it is + /// aligned to the blocks of the underlying disk. BlockAlign = super::SQFS_BLK_FLAGS_SQFS_BLK_ALIGN, + + /// Store the tail of the file in a regular data block rather than a fragment block. + /// + /// The compressed content of a file to be written to an archive is split into equally-sized + /// blocks and stored as "data blocks". The final chunk is usually smaller than the rest, so + /// these final chunks are collected from multiple files are collected and stored together in + /// separate "fragment blocks" as an optimization. If there is a reason for the entire file's + /// contents to be stored together, fragmentation can be disabled using this flag. DontFragment = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_FRAGMENT, + + /// Don't deduplicated data blocks for this file. + /// + /// If two files contain an identical data block, the block will be stored only once and both + /// files' block indices will point to this single block. The user can force all blocks of a + /// file to be stored by setting this flag. DontDeduplicate = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_DEDUPLICATE, + + /// Don't elide sparse blocks. + /// + /// If a block of a file contains only zeros, it will not be stored at all and the file's block + /// index will mark that the block is all-zero. This behavior can be disabled so that a zero + /// block will be written by setting this flag. IgnoreSparse = super::SQFS_BLK_FLAGS_SQFS_BLK_IGNORE_SPARSE, + + /// Don't compute block checksums for this file. + /// + /// Each data block is checksummed to verify data integrity unless this flag is set. DontHash = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_HASH, } +/// Represents the data of a filesystem object that can be added to an archive. +/// +/// When creating the archive, this object is read from a [`Source`] (which additionally describes +/// the filesystem attributes of the node) and used to set the type and contents of the node. pub enum SourceData { + /// Create a file with the provided contents. + /// + /// The contained object will be read and its contents placed in the file written to the + /// archive. File(Box<dyn Read + Sync + Send>), + + /// Create a directory with the given chidren. + /// + /// The creator must provide an iterator over [`OsString`] and `u32`, which respectively + /// represent the name and inode number of each child of this directory. This is one of the + /// hardest parts about writing archive contents -- all children of each directory must be + /// written before the directory itself, so that the inode numbers of the children are known. + /// [`TreeProcessor`] facilitates this by performing a post-order traversal of a filesystem, + /// ensuring that files are written in the correct order. Dir(Box<dyn Iterator<Item=(OsString, u32)> + Sync + Send>), - Symlink(OsString), + + /// Create a symbolic link to the given path. + /// + /// It is not required for the target of the symlink to exist. + Symlink(PathBuf), + + /// Create a block device file with the given major and minor device numbers. BlockDev(u32, u32), + + /// Create a character device file with the given major and minor device numbers. CharDev(u32, u32), + + /// Create a named pipe. Fifo, + + /// Create a socket. Socket, } +/// A single node to be added to the SquashFS archive. +/// +/// This contains a [`SourceData`] instance containing the actual data of the node, along with +/// metadata such as permissions and extended attributes. The path to the node is not part of this +/// object, because all information necessary to reconstruct the directory tree is contained in the +/// directory iterators. However, for higher-level mechanisms that abstract away details such as +/// inode numbers, it is helpful to associate a path with each `Source`; [`SourceFile`] is used for +/// this purpose. +/// +/// This object is designed to be constructed by the user by setting all fields to the appropriate +/// values. pub struct Source { + /// The type of the node and the data it contains. pub data: SourceData, + + /// The UID of the file. pub uid: u32, + + /// The GID of the file. pub gid: u32, + + /// The file mode. pub mode: u16, + + /// The modification time of the file as a Unix timestamp. pub modified: u32, + + /// Extended attributes on the node. Each one must start with a valid xattr namespace (such as + /// "user.", and the values can be arbitrary byte strings. pub xattrs: HashMap<OsString, Vec<u8>>, + + /// [`BlockFlags`] to set on the node to control how its contents are archived. Multiple flags + /// can be combined using `|`. pub flags: u32, } @@ -63,6 +179,7 @@ fn copy_metadata(src: &ManagedPointer<sqfs_inode_generic_t>, dst: &mut ManagedPo } impl Source { + /// Construct a `Source` from a `SourceData`, using defaults for all metadata fields. pub fn defaults(data: SourceData) -> Self { Self { data: data, uid: 0, gid: 0, mode: 0x1ff, modified: 0, xattrs: HashMap::new(), flags: 0 } } @@ -71,7 +188,6 @@ impl Source { ((min & 0xfff00) << 20) | ((maj & 0xfff) << 8) | (min & 0xff) } - // TODO Handle hard links unsafe fn to_inode(&self, link_count: u32) -> Result<ManagedPointer<sqfs_inode_generic_t>> { unsafe fn create_inode(kind: SQFS_INODE_TYPE, extra: usize) -> ManagedPointer<sqfs_inode_generic_t> { use std::alloc::{alloc, Layout}; @@ -89,7 +205,7 @@ impl Source { ret }, SourceData::Symlink(dest_os) => { - let dest = os_to_string(&dest_os)?.into_bytes(); + let dest = os_to_string(dest_os.as_os_str())?.into_bytes(); let mut ret = create_inode(SQFS_INODE_TYPE_SQFS_INODE_SLINK, dest.len()); let mut data = &mut (**ret).data.slink; data.nlink = link_count; @@ -133,11 +249,51 @@ struct IntermediateNode { pos: u64, } +/// A [`Source`] bundled with the path where it should be located. +/// +/// While the path of a `Source` is not strictly necessary to build the directory tree, it is a +/// useful way for automatic archive builders like [`TreeProcessor`] to keep track of files as they +/// are being added. +/// +/// For purposes for which the metadata stored in [`Source`], like permissions and xattrs, are +/// unnecessary, [`defaults`](Self::defaults) can be used to conveniently construct a `FileSource` +/// from a [`PathBuf`] and [`SourceData`]. pub struct SourceFile { pub path: PathBuf, pub content: Source, } +impl SourceFile { + /// Wrap a `SourceData` in a new `Source`, using defaults for all metadata fields. + /// + /// This sets UID and GID to 0 and permissions to 0o777, gives a null modification time and no + /// xattrs, and sets no flags. + pub fn defaults(path: PathBuf, data: SourceData) -> Self { + Self { path: path, content: Source::defaults(data) } + } +} + +/// A basic SquashFS writer. +/// +/// This provides a simple interface for writing archives. The user calls [`open`](Self::open), +/// [`add`](Self::add) to add each node, and [`finish`](Self::finish) to finish writing. This is +/// intended for writing archives that are generated by code or otherwise not reflected by files in +/// a file system -- if you want to archive a tree of files from disk, [`TreeProcessor`] handles +/// directory tracking so that you don't have to do it yourself. +/// +/// **Each node must be written before its parent**, and an error will be raised if this invariant +/// is not maintained -- however, this is not detected until `finish` is called. +/// +/// let writer = Writer::open("archive.sfs")?; +/// let mut ids = HashMap::new(); +/// for i in 0..5 { +/// let mut content = format!("This is the content of file {}.txt.", i).as_bytes(); +/// let source = Source::defaults(SourceData::File(Box::new(content))); +/// let id = writer.add(source)?; +/// ids.insert(OsString::from(format!("{}.txt", i)), id); +/// } +/// writer.add(Source::defaults(SourceData::Dir(Box::new(ids.into_iter()))))?; +/// writer.finish()?; pub struct Writer { outfile: ManagedPointer<sqfs_file_t>, #[allow(dead_code)] compressor_config: sqfs_compressor_config_t, // Referenced by `compressor` @@ -156,6 +312,9 @@ pub struct Writer { } impl Writer { + /// Open a new output file for writing. + /// + /// If the file exists, it will be overwritten. pub fn open<T: AsRef<Path>>(path: T) -> Result<Self> { let cpath = CString::new(os_to_string(path.as_ref().as_os_str())?)?; let block_size = SQFS_DEFAULT_BLOCK_SIZE as u64; @@ -247,6 +406,16 @@ impl Writer { unsafe { (**self.outfile).get_size.expect("Superblock doesn't provide get_size")(*self.outfile) } } + /// Add the provided `Source` to the archive. + /// + /// This writes file data and xattrs to the archive directly, while storing directory tree + /// information to write when `finish` is called. + /// + /// The returned value is the inode number of the added `Source`. If the file is to be added + /// to a directory (that is, almost always), this number needs to be stored so that it can be + /// provided when the directory is added. In the current implementation, inode numbers start + /// at 1 for the first file and count steadily upward, but this behavior may change without + /// warning. pub fn add(&mut self, mut source: Source) -> Result<u32> { let finished = self.finished.read().expect("Poisoned lock"); if *finished { Err(SquashfsError::Finished)?; } @@ -296,6 +465,9 @@ impl Writer { Ok(nodes.len() as u32) } + /// Finish writing the archive and flush all contents to disk. + /// + /// It is an error to call `add` after this has been run. pub fn finish(&mut self) -> Result<()> { *self.finished.write().expect("Poisoned lock") = true; let nodes = self.nodes.lock().expect("Poisoned lock"); @@ -351,17 +523,45 @@ impl Writer { unsafe impl Sync for Writer { } unsafe impl Send for Writer { } +/// Tool to help create an archive from a directory in the filesystem. +/// +/// This wraps a [`Writer`] and takes care of tracking the directory hierarchy as files are added, +/// populating the iterators of [`SourceData::Dir`]s as necessary. +/// +/// To simply create a SquashFS file from a chosen directory, call [`process`](Self::process): +/// +/// TreeProcessor::new("archive.sfs")?.process("/home/me/test")? +/// +/// For more control over the addition process -- for example, to exclude certain files, add +/// extended attributes, ignore errors, or print files as they are added -- use +/// [`iter`](Self::iter) to get an iterator over the directory tree, and then call +/// [`add`](Self::add) on each `SourceFile` yielded after applying any desired transformations. +/// After the iterator finishes, remember to call [`finish`](Self::finish). +/// +/// let processor = TreeProcessor::new("archive.sfs")?; +/// for mut entry in processor.iter("/home/me/test") { +/// entry.content.mode = 0x1ff; // Set all nodes to be read/writable by anyone +/// match processor.add(entry) { +/// Ok(id) => println!("{}: {}", id, entry.path), +/// Err(_) => println!("Failed adding {}", entry.path), +/// } +/// } +/// processor.finish()?; pub struct TreeProcessor { - root: PathBuf, writer: Mutex<Writer>, childmap: Mutex<HashMap<PathBuf, BTreeMap<OsString, u32>>>, } impl TreeProcessor { - pub fn new<P: AsRef<Path>>(writer: Writer, root: P) -> Result<Self> { - Ok(Self { root: root.as_ref().to_path_buf(), writer: Mutex::new(writer), childmap: Mutex::new(HashMap::new()) }) + /// Create a new `TreeProcessor` for an output file. + pub fn new<P: AsRef<Path>>(outfile: P) -> Result<Self> { + let writer = Writer::open(outfile)?; + Ok(Self { writer: Mutex::new(writer), childmap: Mutex::new(HashMap::new()) }) } + /// Add a new file to the archive. + /// + /// It is not recommended to call this on `SourceFile`s that were not yielded by `iter`. pub fn add(&self, mut source: SourceFile) -> Result<u32> { let mut childmap = self.childmap.lock().expect("Poisoned lock"); if let SourceData::Dir(children) = &mut source.content.data { @@ -377,6 +577,7 @@ impl TreeProcessor { Ok(id) } + /// Finish writing the archive. pub fn finish(&self) -> Result<()> { self.writer.lock().expect("Poisoned lock").finish() } @@ -391,7 +592,7 @@ impl TreeProcessor { SourceData::File(Box::new(std::fs::File::open(entry.path())?)) } else if metadata.file_type().is_symlink() { - SourceData::Symlink(std::fs::read_link(entry.path())?.into_os_string()) + SourceData::Symlink(std::fs::read_link(entry.path())?) } else { Err(SquashfsError::WriteType(metadata.file_type()))?; @@ -411,12 +612,28 @@ impl TreeProcessor { Ok(source) } - pub fn iter<'a>(&'a self) -> TreeIterator<'a> { - let tree = WalkDir::new(&self.root).follow_links(false).contents_first(true); + /// Create an iterator over a directory tree, yielding them in a form suitable to pass to + /// `add`. + pub fn iter<'a, P: AsRef<Path>>(&'a self, root: P) -> TreeIterator<'a> { + let tree = WalkDir::new(root).follow_links(false).contents_first(true); TreeIterator { processor: self, tree: tree.into_iter() } } + + /// Add an entire directory tree to the archive, then finish it. + /// + /// This is the most basic, bare-bones way to create a full archive from an existing directory + /// tree. This offers no way to customize the archive or handle errors gracefully. + pub fn process<P: AsRef<Path>>(self, root: P) -> Result<()> { + for entry in self.iter(root) { self.add(entry?)?; } + self.finish()?; + Ok(()) + } } +/// An iterator yielding the nodes in a directory tree in a way suitable for archiving. +/// +/// This is created by a [`TreeProcessor`] and the items yielded are intended to be +/// [`add`](TreeProcessor::add)ed to it. pub struct TreeIterator<'a> { processor: &'a TreeProcessor, tree: walkdir::IntoIter, |