aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Schauer <matthew.schauer@e10x.net>2020-11-22 12:24:19 -0800
committerMatthew Schauer <matthew.schauer@e10x.net>2021-04-07 21:05:12 -0700
commit608a26a4f1d9aa880e44c7d56a8f928f53e24b2b (patch)
treeda8afc500d440dfae46f9ec98774b27b1fea459d
parentae93dc45e82a1e92a9c07c85fdb76d33ce56bf91 (diff)
Document public interface
-rw-r--r--src/lib.rs75
-rw-r--r--src/read.rs218
-rw-r--r--src/write.rs235
3 files changed, 494 insertions, 34 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 5c737d4..d284f3b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,25 @@
+//! This crate provides Rust bindings for the [squashfs-tools-ng][] library, providing support for
+//! SquashFS as an embeddable archive format without the need for kernel support. It also tries to
+//! provide a level of safety and abstraction on top of the C library. Cross-platform usability is a
+//! secondary goal.
+//!
+//! # Installation
+//!
+//! Currently, the underlying [squashfs-tools-ng][] library must be installed on the system both to
+//! build and to use this library. The development headers (`/usr/include/sqfs/...`) are required
+//! to build, and the shared library (`/usr/lib/libsquashfs.so`) to run. The project's GitHub page
+//! asserts that packages are available in many Linux distributions' repositories.
+//!
+//! Once the dependencies are in place, this should function like most other Rust libraries, and
+//! `cargo build` should suffice to build the library.
+//!
+//! # Usage
+//!
+//! The [`read`] and [`write`](module@write) modules below provide support for reading and writing
+//! SquashFS files, respectively. Check them out for further documentation.
+//!
+//! [squashfs-tools-ng]: https://github.com/AgentD/squashfs-tools-ng/
+
#[macro_use] extern crate lazy_static;
extern crate libc;
extern crate memmap;
@@ -11,7 +33,6 @@ use std::mem::MaybeUninit;
use std::ffi::{OsStr, OsString};
use std::path::PathBuf;
use std::ptr;
-use bindings::*;
use num_derive::FromPrimitive;
use num_traits::FromPrimitive;
use thiserror::Error;
@@ -24,33 +45,44 @@ mod bindings {
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
}
+use bindings::*;
+
pub mod read;
pub mod write;
type BoxedError = Box<dyn std::error::Error + std::marker::Send + std::marker::Sync>;
+/// Errors raised by the underlying library.
+///
+/// This error type reflects all errors raised by the squashfs-tools-ng library. This should
+/// always be wrapped in a [`SquashfsError`] before being returned from any of the functions in
+/// this library.
#[derive(Error, Debug, FromPrimitive)]
#[repr(i32)]
pub enum LibError {
- #[error("Failed to allocate memory")] Alloc = -1,
- #[error("Generic I/O failure occurred")] Io = -2,
- #[error("Compressor failed to extract data")] Compressor = -3,
- #[error("Internal error occurred")] Internal = -4,
- #[error("Archive file appears to be corrupted")] Corrupted = -5,
- #[error("Unsupported feature used")] Unsupported = -6,
- #[error("Archive would overflow memory")] Overflow = -7,
- #[error("Out-of-bounds access attempted")] OutOfBounds = -8,
- #[error("Superblock magic number incorrect")] SuperMagic = -9,
- #[error("Unsupported archive version")] SuperVersion = -10,
- #[error("Archive block size is invalid")] SuperBlockSize = -11,
- #[error("Not a directory")] NotDir = -12,
- #[error("Path does not exist")] NoEntry = -13,
- #[error("Hard link loop detected")] LinkLoop = -14,
- #[error("Not a regular file")] NotFile = -15,
- #[error("Invalid argument passed")] ArgInvalid = -16,
- #[error("Library operations performed in incorrect order")] Sequence = -17,
-}
-
+ #[error("Failed to allocate memory")] Alloc = SQFS_ERROR_SQFS_ERROR_ALLOC,
+ #[error("Generic I/O failure occurred")] Io = SQFS_ERROR_SQFS_ERROR_IO,
+ #[error("Compressor failed to extract data")] Compressor = SQFS_ERROR_SQFS_ERROR_COMPRESSOR,
+ #[error("Internal error occurred")] Internal = SQFS_ERROR_SQFS_ERROR_INTERNAL,
+ #[error("Archive file appears to be corrupted")] Corrupted = SQFS_ERROR_SQFS_ERROR_CORRUPTED,
+ #[error("Unsupported feature used")] Unsupported = SQFS_ERROR_SQFS_ERROR_UNSUPPORTED,
+ #[error("Archive would overflow memory")] Overflow = SQFS_ERROR_SQFS_ERROR_OVERFLOW,
+ #[error("Out-of-bounds access attempted")] OutOfBounds = SQFS_ERROR_SQFS_ERROR_OUT_OF_BOUNDS,
+ #[error("Superblock magic number incorrect")] SuperMagic = SQFS_ERROR_SFQS_ERROR_SUPER_MAGIC,
+ #[error("Unsupported archive version")] SuperVersion = SQFS_ERROR_SFQS_ERROR_SUPER_VERSION,
+ #[error("Archive block size is invalid")] SuperBlockSize = SQFS_ERROR_SQFS_ERROR_SUPER_BLOCK_SIZE,
+ #[error("Not a directory")] NotDir = SQFS_ERROR_SQFS_ERROR_NOT_DIR,
+ #[error("Path does not exist")] NoEntry = SQFS_ERROR_SQFS_ERROR_NO_ENTRY,
+ #[error("Hard link loop detected")] LinkLoop = SQFS_ERROR_SQFS_ERROR_LINK_LOOP,
+ #[error("Not a regular file")] NotFile = SQFS_ERROR_SQFS_ERROR_NOT_FILE,
+ #[error("Invalid argument passed")] ArgInvalid = SQFS_ERROR_SQFS_ERROR_ARG_INVALID,
+ #[error("Library operations performed in incorrect order")] Sequence = SQFS_ERROR_SQFS_ERROR_SEQUENCE,
+}
+
+/// Errors encountered while reading or writing an archive.
+///
+/// This wraps all errors that might be encountered by the library during its normal course of
+/// operation.
#[derive(Error, Debug)]
pub enum SquashfsError {
#[error("Input contains an invalid null character")] NullInput(#[from] std::ffi::NulError),
@@ -79,7 +111,8 @@ pub enum SquashfsError {
#[error("Tried to add files to a writer that was already finished")] Finished,
}
-type Result<T> = std::result::Result<T, SquashfsError>;
+/// Result type returned by SquashFS library operations.
+pub type Result<T> = std::result::Result<T, SquashfsError>;
fn sfs_check(code: i32, desc: &str) -> Result<i32> {
match code {
diff --git a/src/read.rs b/src/read.rs
index e708f12..6b2c6fb 100644
--- a/src/read.rs
+++ b/src/read.rs
@@ -1,3 +1,22 @@
+//! Facilities for reading SquashFS archives.
+//!
+//! The most common scenario for using this library is:
+//!
+//! 1. To open a SquashFS file, use [`Archive::new`].
+//! 2. Call [`get`](Archive::get) to retrieve a [`Node`] by its path.
+//! 3. Call [`data`](Node::data) to get a [`Data`] object containing the node's data.
+//!
+//! `Node` also provides methods for inspecting metadata, resolving symlinks, and conveniently
+//! converting to file and directory objects.
+//!
+//! let archive = Archive::open("archive.sfs")?;
+//! match archive.get("/etc/passwd")? {
+//! None => println!("File not present"),
+//! Some(node) => if let Data::File(file) = node.data()? {
+//! println!("{}", file.to_string()?);
+//! },
+//! }
+
use std::collections::{HashMap, HashSet};
use std::ffi::{CStr, CString};
use std::io;
@@ -32,6 +51,21 @@ fn enoent_ok<T>(t: Result<T>) -> Result<Option<T>> {
}
}
+/// A directory in the archive.
+///
+/// Directory objects are obtained by calling the [`data`](Node::data) or [`as_dir`](Node::as_dir)
+/// method on a [`Node`] object. `Dir` implements [`Iterator`](std::iter::Iterator), so all
+/// children can be retrieved just by iterating over the directory. The iterator can be reset by
+/// calling [`reset`](Self::reset). Individual children can also be retrieved by name using
+/// [`child`](Self::child).
+///
+/// let archive = Archive::new("archive.sfs")?;
+/// let node = archive.get("/my-dir")?.expect("/my-dir does not exist").resolve()?;
+/// let dir = node.as_dir()?;
+/// let child = dir.child("file.txt")?.expect("/my-dir/file.txt does not exist");
+/// for entry in dir {
+/// println!("{}", entry?.name().unwrap());
+/// }
#[derive(Debug)]
pub struct Dir<'a> {
node: &'a Node<'a>,
@@ -49,6 +83,10 @@ impl<'a> Dir<'a> {
Ok(Self { node: node, compressor: compressor, reader: Mutex::new(reader) })
}
+ /// Reset the directory reader to the beginning of the directory.
+ ///
+ /// If the directory has been partially or completely iterated through, this will put it back
+ /// to the beginning so that it can be read again.
pub fn reset(&mut self) {
unsafe { sqfs_dir_reader_rewind(**self.reader.lock().expect(LOCK_ERR)); }
}
@@ -69,6 +107,10 @@ impl<'a> Dir<'a> {
}
}
+ /// Select a child inside the directory by name.
+ ///
+ /// This will return `Ok(None)` if the child does not exist, or an `Err` if the lookup could
+ /// not be performed.
pub fn child(&self, name: &str) -> Result<Option<Node>> {
match unsafe { enoent_ok(sfs_check(sqfs_dir_reader_find(**self.reader.lock().expect(LOCK_ERR), CString::new(name)?.as_ptr()), &format!("Couldn't find child \"{}\"", name)))? } {
None => Ok(None),
@@ -85,6 +127,22 @@ impl<'a> std::iter::Iterator for Dir<'a> {
}
}
+/// A file in the archive.
+///
+/// `File` objects allow standard operations on file inodes in an archive. `File` implements
+/// [`Read`] and [`Seek`], so anything that reads files using standard Rust semantics can interact
+/// natively with these files. [`to_bytes`](Self::to_bytes) and [`to_string`](Self::to_string)
+/// offer convenience wrappers around this. Files that were archived with compression and
+/// fragmentation disabled can also be [`mmap`](Self::mmap)ed and accessed as an ordinary byte
+/// array.
+///
+/// let archive = Archive::new("archive.sfs")?;
+/// let node = archive.get("/a/01.txt")?.unwrap().resolve()?;
+/// let file = node.as_file()?;
+/// // File can now be used like anything else that implements `Read` and `Seek`.
+/// let mut buf = [0; 10];
+/// file.seek(SeekFrom::End(-10))?;
+/// file.read(&mut buf)?;
pub struct File<'a> {
node: &'a Node<'a>,
#[allow(dead_code)] compressor: ManagedPointer<sqfs_compressor_t>, // Referenced by `reader`
@@ -102,24 +160,42 @@ impl<'a> File<'a> {
Ok(Self { node: node, compressor: compressor, reader: Mutex::new(reader), offset: Mutex::new(0) })
}
+ /// Retrieve the size of the file in bytes.
pub fn size(&self) -> u64 {
let mut ret: u64 = 0;
unsafe { sqfs_inode_get_file_size(self.node.inode.as_const(), &mut ret) };
ret
}
+ /// Retrieve the entire contents of the file in the form of a byte Vec.
pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
let mut ret = Vec::with_capacity(self.size() as usize);
self.read_to_end(&mut ret)?;
Ok(ret)
}
+ /// Retrieve the entire contents of the file in the form of a String.
+ ///
+ /// This calls [`Read::read_to_string`] under the hood. Consequently, a UTF-8 error
+ /// will be raised if the entire file is not valid UTF-8.
pub fn to_string(&mut self) -> Result<String> {
let mut ret = String::with_capacity(self.size() as usize);
self.read_to_string(&mut ret)?;
Ok(ret)
}
+ /// Map a file into memory for fast parallel random access.
+ ///
+ /// This uses `mmap` to map the file into memory. **It will fail** and return `None` if the
+ /// file is compressed or fragmented. If the [`DontCompress`](write::BlockFlags::DontCompress)
+ /// and [`DontFragment`](write::BlockFlags::DontFragment) options are set for a file at
+ /// archive creation time, it will be added to the archive in one contiguous unmodified chunk.
+ /// This is necessary because `mmap` provides a view into a file exactly as it is on-disk;
+ /// there is no opportunity for the library to apply decompression or other transformations
+ /// when mapping.
+ ///
+ /// let map = file.mmap().expect("File is not mmappable");
+ /// println!("{}", str::from_utf8(map)?);
pub fn mmap<'b>(&'b mut self) -> Option<&'b [u8]> {
let inode = unsafe { &***self.node.inode };
let (start, frag_idx) = unsafe {
@@ -174,14 +250,34 @@ impl<'a> std::fmt::Debug for File<'a> {
}
}
+/// Enum type for the various kinds of data that an inode can be.
+///
+/// This is retrieved by calling [`Node::data`] and can be matched to determine the type and
+/// contents of a node.
+///
+/// For accessing files and directories, [`Node`] provides the [`as_dir`](Node::as_dir) and
+/// [`as_file`](Node::as_file) methods to bypass `Data` completely.
#[derive(Debug)]
pub enum Data<'a> {
+ /// A regular file, containing a [`File`] object that can be used to extract the file contents.
File(File<'a>),
+
+ /// A directory, containing a [`Dir`] that can be used to access the directory's children.
Dir(Dir<'a>),
- Symlink(String),
+
+ /// A symbolic link, containing the target of the link as a [`PathBuf`].
+ Symlink(PathBuf),
+
+ /// A block device file, containing the device's major and minor numbers.
BlockDev(u32, u32),
+
+ /// A character device file, containing the device's major and minor numbers.
CharDev(u32, u32),
+
+ /// A named pipe.
Fifo,
+
+ /// A socket.
Socket,
}
@@ -198,10 +294,12 @@ impl<'a> Data<'a> {
SQFS_INODE_TYPE_SQFS_INODE_DIR | SQFS_INODE_TYPE_SQFS_INODE_EXT_DIR => Ok(Self::Dir(Dir::new(node)?)),
SQFS_INODE_TYPE_SQFS_INODE_FILE | SQFS_INODE_TYPE_SQFS_INODE_EXT_FILE => Ok(Self::File(File::new(node)?)),
SQFS_INODE_TYPE_SQFS_INODE_SLINK => Ok(unsafe {
- Self::Symlink(arr_to_string(&(***node.inode).extra, (***node.inode).data.slink.target_size as usize))
+ let path_str = arr_to_string(&(***node.inode).extra, (***node.inode).data.slink.target_size as usize);
+ Self::Symlink(PathBuf::from(path_str))
}),
SQFS_INODE_TYPE_SQFS_INODE_EXT_SLINK => Ok(unsafe {
- Self::Symlink(arr_to_string(&(***node.inode).extra, (***node.inode).data.slink_ext.target_size as usize))
+ let path_str = arr_to_string(&(***node.inode).extra, (***node.inode).data.slink_ext.target_size as usize);
+ Self::Symlink(PathBuf::from(path_str))
}),
SQFS_INODE_TYPE_SQFS_INODE_BDEV => Ok(unsafe {
let (maj, min) = get_dev_nums((***node.inode).data.dev.devno);
@@ -225,6 +323,8 @@ impl<'a> Data<'a> {
}
}
+ /// Get a human-readable English name for the type of file represented by this object, intended
+ /// primarily for debugging.
pub fn name(&self) -> String {
match self {
Data::File(_) => "regular file",
@@ -238,6 +338,7 @@ impl<'a> Data<'a> {
}
}
+/// Represents the namespace of extended attributes.
#[repr(u32)]
#[derive(Clone, Copy)]
pub enum XattrType {
@@ -246,6 +347,17 @@ pub enum XattrType {
Security = SQFS_XATTR_TYPE_SQFS_XATTR_SECURITY,
}
+/// An object packaging a [`File`] with the [`Node`] from which it was constructed.
+///
+/// `File`s reference data in the `Node` objects that created them, so a `File` cannot be used
+/// after its corresponding `Node` has been dropped. This object packages the two together,
+/// creating an object that is valid for the lifetime of the owning `Archive`.
+///
+/// This is a simple wrapper around an [`OwningHandle`] that re-implements the [`Read`] and
+/// [`Seek`] traits so that it can still be treated as a file. [`Deref`](std::ops::Deref) and
+/// [`DerefMut`](std::ops::DerefMut) are also available to access the contained file.
+///
+/// Create an `OwnedFile` using [`Node::into_owned_file`].
pub struct OwnedFile<'a> {
handle: OwningHandle<Box<Node<'a>>, Box<File<'a>>>,
}
@@ -276,6 +388,15 @@ impl<'a> std::ops::DerefMut for OwnedFile<'a> {
}
}
+/// An object packaging a [`Dir`] with the [`Node`] from which it was constructed.
+///
+/// `Dir`s retain references to the `Node`s that created them, so a `Dir` cannot be used after its
+/// corresponding `Node` has been dropped. `OwnedDir` packages the two together, creating an
+/// independent object with the same lifetime as its owning `Archive`.
+///
+/// `OwnedDir` re-implements [`Iterator`](std::iter::Iterator) so that it can be iterated over just
+/// like `Dir`. It also implements [`Deref`](std::ops::Deref) and [`DerefMut`](std::ops::DerefMut)
+/// to allow access to the internal `Dir`.
pub struct OwnedDir<'a> {
handle: OwningHandle<Box<Node<'a>>, Box<Dir<'a>>>,
}
@@ -302,6 +423,17 @@ impl<'a> std::ops::DerefMut for OwnedDir<'a> {
}
}
+/// Information about a single node in the directory tree.
+///
+/// This corresponds to the inode and directory entry structures of the underlying library.
+/// Because SquashFS inodes do not retain pointers back to their directory entries, inodes by
+/// default have no information about their positions in the directory tree. To work around this,
+/// `Node` structs store their path and propagate it through calls like [`child`](Dir::child) and
+/// [`parent`](Self::parent). If the `Node` was originally constructed in a way that does not
+/// provide path information, such as retrieving a node by inode number using [`Archive::get_id`],
+/// then the methods that require knowledge of the node's location in the tree, such as
+/// [`path`](Self::path) and [`parent`](Self::parent), will fail. For this reason, it is generally
+/// recommended to get nodes by path when possible.
pub struct Node<'a> {
container: &'a Archive,
path: Option<PathBuf>,
@@ -313,6 +445,7 @@ impl<'a> Node<'a> {
Ok(Self { container: container, path: path, inode: Arc::new(inode) })
}
+ /// Get a node's extended attributes in a given namespace as a map of byte Vecs.
pub fn xattrs(&self, category: XattrType) -> Result<HashMap<Vec<u8>, Vec<u8>>> {
if self.container.superblock.flags & SQFS_SUPER_FLAGS_SQFS_FLAG_NO_XATTRS as u16 != 0 { Ok(HashMap::new()) }
// TODO The following line reflects what I think is a bug. I have a non-xattr archive
@@ -353,14 +486,23 @@ impl<'a> Node<'a> {
}
}
+ /// Get the inode number of a node.
+ ///
+ /// This can be used to cheaply compare nodes for equality or can be later used with
+ /// [`get_id`](Archive::get_id) to retrieve nodes without traversing the directory tree.
pub fn id(&self) -> u32 {
unsafe { (***self.inode).base.inode_number }
}
+ /// Retrieve the data stored at the node.
pub fn data(&self) -> Result<Data> {
Data::new(&self)
}
+ /// Get the absolute path to the node in the archive.
+ ///
+ /// If the node was obtained in a way that did not provide path information, this will return
+ /// `None`. If the node was retrieved using [`Archive::get`], this should return `Some`.
pub fn path(&self) -> Option<&Path> {
self.path.as_ref().map(|path| path.as_path())
}
@@ -372,10 +514,18 @@ impl<'a> Node<'a> {
}
}
+ /// A convenience method to retrieve the file name of the node from its path.
+ ///
+ /// As with [`path`](Self::path), if the node does not have embedded path information, this
+ /// will return `None`.
pub fn name(&self) -> Option<String> {
self.path.as_ref().map(|path| path.file_name().map(|x| x.to_string_lossy().to_string()).unwrap_or("/".to_string()))
}
+ /// Get the parent directory node of the current node.
+ ///
+ /// If the node is the root of the tree, it will return a copy of itself. If this node was
+ /// created without path information, it will raise a [`NoPath`](SquashfsError::NoPath) error.
pub fn parent(&self) -> Result<Self> {
self.path.as_ref().map(|path| {
let ppath = path.parent().unwrap_or(&Path::new(""));
@@ -383,6 +533,10 @@ impl<'a> Node<'a> {
}).ok_or(SquashfsError::NoPath)?
}
+ /// Resolve symbolic links to their targets, raising an error if a target does not exist.
+ ///
+ /// This works the same way as [`resolve`](Self::resolve), except that an error is raised if
+ /// any link in the chain of symbolic links points at a path that does not exist.
pub fn resolve_exists(&self) -> Result<Self> {
let mut visited = HashSet::new();
let mut cur = Box::new(self.clone());
@@ -410,10 +564,20 @@ impl<'a> Node<'a> {
}
}
+ /// Resolve symbolic links to their targets.
+ ///
+ /// This follows the chain of symbolic links starting at the current node all the way to the
+ /// end, returning the final node, which is guaranteed not to be a symbolic link. If any link
+ /// in the chain points at a path that does not exist, it returns `Ok(None)`. If the current
+ /// node is not a sybmolic link, this returns a copy of itself.
pub fn resolve(&self) -> Result<Option<Self>> {
enoent_ok(self.resolve_exists())
}
+ /// Return true if the current `Node` is a file.
+ ///
+ /// This does *not* resolve symbolic links, and will return `false` when called on nodes that
+ /// are symbolic links to files.
pub fn is_file(&self) -> Result<bool> {
match self.data()? {
Data::File(_) => Ok(true),
@@ -421,6 +585,11 @@ impl<'a> Node<'a> {
}
}
+ /// Fetch the [`File`] object from the current `Node`.
+ ///
+ /// This is essentially a shortcut for `if let Data::File(file) = self.data()`. If this node
+ /// is not a regular file, this will return an error. This does *not* resolve symbolic links;
+ /// the caller should call [`resolve`](Self::resolve) first if the node could be a link.
pub fn as_file(&self) -> Result<File> {
match self.data()? {
Data::File(f) => Ok(f),
@@ -428,11 +597,20 @@ impl<'a> Node<'a> {
}
}
+ /// Convert the `Node` into an [`OwnedFile`].
+ ///
+ /// This resolves symbolic links. If the current node is not a regular file or a link to one,
+ /// it will return an error.
+ ///
+ /// let archive = Archive::new("archive.sfs")?;
+ /// let mut buf = String::new();
+ /// archive.get("/file.txt")?.unwrap().into_owned_file()?.read_to_string(&mut buf)?;
pub fn into_owned_file(self) -> Result<OwnedFile<'a>> {
let resolved = self.resolve_exists()?;
Ok(OwnedFile { handle: OwningHandle::try_new(Box::new(resolved), |x| unsafe { (*x).as_file().map(|x| Box::new(x)) })? })
}
+ /// Return true if the current `Node` is a directory.
pub fn is_dir(&self) -> Result<bool> {
match self.data()? {
Data::Dir(_) => Ok(true),
@@ -440,6 +618,11 @@ impl<'a> Node<'a> {
}
}
+ /// Fetch the [`Dir`] object from the current `Node`.
+ ///
+ /// This is essentially a shortcut for `if let Data::Dir(dir) = self.data()`. If this node is
+ /// not a directory, it will return an error. This does *not* resolve symbolic links; the
+ /// caller should call [`resolve`](Self::resolve) first if the node could be a link.
pub fn as_dir(&self) -> Result<Dir> {
match self.data()? {
Data::Dir(d) => Ok(d),
@@ -447,25 +630,38 @@ impl<'a> Node<'a> {
}
}
+ /// Convert the `Node` into an [`OwnedDir`].
+ ///
+ /// This resolves symbolic links. If the current node is not a directory or a link to one, it
+ /// will return an error.
+ ///
+ /// let archive = Archive::new("archive.sfs")?;
+ /// for child in archive.get("/dir")?.unwrap().into_owned_dir()? {
+ /// println!("{}", child?.name());
+ /// }
pub fn into_owned_dir(self) -> Result<OwnedDir<'a>> {
let resolved = self.resolve_exists()?;
Ok(OwnedDir { handle: OwningHandle::try_new(Box::new(resolved), |x| unsafe { (*x).as_dir().map(|x| Box::new(x)) })? })
}
+ /// Get the UID of the `Node`.
pub fn uid(&self) -> Result<u32> {
let idx = unsafe { (***self.inode).base.uid_idx };
self.container.id_lookup(idx)
}
+ /// Get the GID of the `Node`.
pub fn gid(&self) -> Result<u32> {
let idx = unsafe { (***self.inode).base.gid_idx };
self.container.id_lookup(idx)
}
+ /// Get the file mode of the `Node`.
pub fn mode(&self) -> u16 {
unsafe { (***self.inode).base.mode }
}
+ /// Get the modification time of the `Node` as a UNIX timestamp.
pub fn mtime(&self) -> u32 {
unsafe { (***self.inode).base.mod_time }
}
@@ -490,6 +686,7 @@ impl<'a> std::fmt::Debug for Node<'a> {
}
}
+/// An open SquashFS archive.
pub struct Archive {
path: PathBuf,
file: ManagedPointer<sqfs_file_t>,
@@ -499,6 +696,7 @@ pub struct Archive {
}
impl Archive {
+ /// Open an existing archive for reading.
pub fn new<T: AsRef<Path>>(path: T) -> Result<Self> {
let cpath = CString::new(os_to_string(path.as_ref().as_os_str())?)?;
let file = sfs_init_check_null(&|| unsafe {
@@ -516,7 +714,6 @@ impl Archive {
Ok(Self { path: path.as_ref().to_path_buf(), file: file, superblock: superblock, compressor_config: compressor_config, mmap: (os_file, map) })
}
-
fn compressor(&self) -> Result<ManagedPointer<sqfs_compressor_t>> {
Ok(sfs_init_ptr(&|x| unsafe {
sqfs_compressor_create(&self.compressor_config, x)
@@ -541,14 +738,17 @@ impl Archive {
}, "Couldn't get ID from ID table")?)
}
+ /// Retrieve the path with that was used to open the archive.
pub fn path(&self) -> &Path {
&self.path
}
+ /// Get the number of inodes in the archive.
pub fn size(&self) -> u32 {
self.superblock.inode_count
}
+ /// Get the [`Node`] located at the given path, raising an error if it does not exist.
pub fn get_exists<T: AsRef<Path>>(&self, path: T) -> Result<Node> {
let compressor = self.compressor()?;
let dir_reader = sfs_init_check_null(&|| unsafe {
@@ -570,10 +770,20 @@ impl Archive {
}
}
+ /// Get the [`Node`] located at the given path in the archive.
+ ///
+ /// If the path is not present, `Ok(None)` will be returned.
pub fn get<T: AsRef<Path>>(&self, path: T) -> Result<Option<Node>> {
enoent_ok(self.get_exists(path))
}
+ /// Get a node from the archive by its inode number.
+ ///
+ /// Each inode in an archive has a unique ID. If the archive was created with the "exportable"
+ /// option (intended for exporting over NFS), it is efficient to look up inodes by their IDs.
+ /// If this archive is not exportable, [`SquashfsError::Unsupported`] will be raised. A `Node`
+ /// obtained in this way will lack path information, and as such operations like getting its
+ /// file name or parent will fail.
pub fn get_id(&self, id: u64) -> Result<Node> {
if self.superblock.flags & SQFS_SUPER_FLAGS_SQFS_FLAG_EXPORTABLE as u16 == 0 { Err(SquashfsError::Unsupported("inode indexing".to_string()))?; }
if id <= 0 || id > self.superblock.inode_count as u64 { Err(SquashfsError::Range(id, self.superblock.inode_count as u64))? }
diff --git a/src/write.rs b/src/write.rs
index fb15ed1..6c9c281 100644
--- a/src/write.rs
+++ b/src/write.rs
@@ -1,3 +1,26 @@
+//! Facilities for writing SquashFS archives.
+//!
+//! The most straightforward way to write a SquashFS file from a directory tree on-disk is to use a
+//! [`TreeProcessor`]. This provides the ability to make "last-minute" modifications to the files
+//! that are added, such as skipping certain files or modifying metadata.
+//!
+//! To create a totally "synthetic" SquashFS file that is not built from files in a filesystem,
+//! open a [`Writer`] and feed [`Source`]s to it.
+//!
+//! # Limitations
+//!
+//! This library does not yet handle hard links; files with multiple hard links will be archived as
+//! separate files with identical contents (which should be deduplicated and end up taking up
+//! little additional space).
+//!
+//! The SquashFS specification includes a field in directory inodes for the parent inode number,
+//! presumably to make `..` directory entries work. This is one factor that makes it impossible to
+//! build a SquashFS file without building out the entire directory tree to be archived in memory.
+//! I have tried as hard as poassible to reduce the amount of data that must be stored for each
+//! node added, and this architecture makes it infeasible to store parent inodes in directory
+//! entries. I hope to fix this some day, and in the meantime it has not caused problems in the
+//! ways I have used the resultant files.
+
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};
use std::ffi::{CString, OsString};
@@ -9,33 +32,126 @@ use super::*;
use super::SquashfsError;
use walkdir::{DirEntry, WalkDir};
+/// Flags to fine-tune how an entry is added to the archive.
+///
+/// These valued can be ORed together and passed in the [`flags`](Source::flags) field of a
+/// [`Source`] object.
#[repr(u32)]
pub enum BlockFlags {
+ /// Don't compress file data.
+ ///
+ /// By default, files are compressed, and the compressed version is stored in the archive if it
+ /// is smaller than the uncompressed version. Setting this flag will force the file to be
+ /// stored uncompressed.
DontCompress = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_COMPRESS,
+
+ /// Align the file data to device blocks.
+ ///
+ /// If set, padding will be added before and after this file's data blocks so that it is
+ /// aligned to the blocks of the underlying disk.
BlockAlign = super::SQFS_BLK_FLAGS_SQFS_BLK_ALIGN,
+
+ /// Store the tail of the file in a regular data block rather than a fragment block.
+ ///
+ /// The compressed content of a file to be written to an archive is split into equally-sized
+ /// blocks and stored as "data blocks". The final chunk is usually smaller than the rest, so
+ /// these final chunks are collected from multiple files are collected and stored together in
+ /// separate "fragment blocks" as an optimization. If there is a reason for the entire file's
+ /// contents to be stored together, fragmentation can be disabled using this flag.
DontFragment = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_FRAGMENT,
+
+ /// Don't deduplicated data blocks for this file.
+ ///
+ /// If two files contain an identical data block, the block will be stored only once and both
+ /// files' block indices will point to this single block. The user can force all blocks of a
+ /// file to be stored by setting this flag.
DontDeduplicate = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_DEDUPLICATE,
+
+ /// Don't elide sparse blocks.
+ ///
+ /// If a block of a file contains only zeros, it will not be stored at all and the file's block
+ /// index will mark that the block is all-zero. This behavior can be disabled so that a zero
+ /// block will be written by setting this flag.
IgnoreSparse = super::SQFS_BLK_FLAGS_SQFS_BLK_IGNORE_SPARSE,
+
+ /// Don't compute block checksums for this file.
+ ///
+ /// Each data block is checksummed to verify data integrity unless this flag is set.
DontHash = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_HASH,
}
+/// Represents the data of a filesystem object that can be added to an archive.
+///
+/// When creating the archive, this object is read from a [`Source`] (which additionally describes
+/// the filesystem attributes of the node) and used to set the type and contents of the node.
pub enum SourceData {
+ /// Create a file with the provided contents.
+ ///
+ /// The contained object will be read and its contents placed in the file written to the
+ /// archive.
File(Box<dyn Read + Sync + Send>),
+
+ /// Create a directory with the given chidren.
+ ///
+ /// The creator must provide an iterator over [`OsString`] and `u32`, which respectively
+ /// represent the name and inode number of each child of this directory. This is one of the
+ /// hardest parts about writing archive contents -- all children of each directory must be
+ /// written before the directory itself, so that the inode numbers of the children are known.
+ /// [`TreeProcessor`] facilitates this by performing a post-order traversal of a filesystem,
+ /// ensuring that files are written in the correct order.
Dir(Box<dyn Iterator<Item=(OsString, u32)> + Sync + Send>),
- Symlink(OsString),
+
+ /// Create a symbolic link to the given path.
+ ///
+ /// It is not required for the target of the symlink to exist.
+ Symlink(PathBuf),
+
+ /// Create a block device file with the given major and minor device numbers.
BlockDev(u32, u32),
+
+ /// Create a character device file with the given major and minor device numbers.
CharDev(u32, u32),
+
+ /// Create a named pipe.
Fifo,
+
+ /// Create a socket.
Socket,
}
+/// A single node to be added to the SquashFS archive.
+///
+/// This contains a [`SourceData`] instance containing the actual data of the node, along with
+/// metadata such as permissions and extended attributes. The path to the node is not part of this
+/// object, because all information necessary to reconstruct the directory tree is contained in the
+/// directory iterators. However, for higher-level mechanisms that abstract away details such as
+/// inode numbers, it is helpful to associate a path with each `Source`; [`SourceFile`] is used for
+/// this purpose.
+///
+/// This object is designed to be constructed by the user by setting all fields to the appropriate
+/// values.
pub struct Source {
+ /// The type of the node and the data it contains.
pub data: SourceData,
+
+ /// The UID of the file.
pub uid: u32,
+
+ /// The GID of the file.
pub gid: u32,
+
+ /// The file mode.
pub mode: u16,
+
+ /// The modification time of the file as a Unix timestamp.
pub modified: u32,
+
+ /// Extended attributes on the node. Each one must start with a valid xattr namespace (such as
+ /// "user.", and the values can be arbitrary byte strings.
pub xattrs: HashMap<OsString, Vec<u8>>,
+
+ /// [`BlockFlags`] to set on the node to control how its contents are archived. Multiple flags
+ /// can be combined using `|`.
pub flags: u32,
}
@@ -63,6 +179,7 @@ fn copy_metadata(src: &ManagedPointer<sqfs_inode_generic_t>, dst: &mut ManagedPo
}
impl Source {
+ /// Construct a `Source` from a `SourceData`, using defaults for all metadata fields.
pub fn defaults(data: SourceData) -> Self {
Self { data: data, uid: 0, gid: 0, mode: 0x1ff, modified: 0, xattrs: HashMap::new(), flags: 0 }
}
@@ -71,7 +188,6 @@ impl Source {
((min & 0xfff00) << 20) | ((maj & 0xfff) << 8) | (min & 0xff)
}
- // TODO Handle hard links
unsafe fn to_inode(&self, link_count: u32) -> Result<ManagedPointer<sqfs_inode_generic_t>> {
unsafe fn create_inode(kind: SQFS_INODE_TYPE, extra: usize) -> ManagedPointer<sqfs_inode_generic_t> {
use std::alloc::{alloc, Layout};
@@ -89,7 +205,7 @@ impl Source {
ret
},
SourceData::Symlink(dest_os) => {
- let dest = os_to_string(&dest_os)?.into_bytes();
+ let dest = os_to_string(dest_os.as_os_str())?.into_bytes();
let mut ret = create_inode(SQFS_INODE_TYPE_SQFS_INODE_SLINK, dest.len());
let mut data = &mut (**ret).data.slink;
data.nlink = link_count;
@@ -133,11 +249,51 @@ struct IntermediateNode {
pos: u64,
}
+/// A [`Source`] bundled with the path where it should be located.
+///
+/// While the path of a `Source` is not strictly necessary to build the directory tree, it is a
+/// useful way for automatic archive builders like [`TreeProcessor`] to keep track of files as they
+/// are being added.
+///
+/// For purposes for which the metadata stored in [`Source`], like permissions and xattrs, are
+/// unnecessary, [`defaults`](Self::defaults) can be used to conveniently construct a `FileSource`
+/// from a [`PathBuf`] and [`SourceData`].
pub struct SourceFile {
pub path: PathBuf,
pub content: Source,
}
+impl SourceFile {
+ /// Wrap a `SourceData` in a new `Source`, using defaults for all metadata fields.
+ ///
+ /// This sets UID and GID to 0 and permissions to 0o777, gives a null modification time and no
+ /// xattrs, and sets no flags.
+ pub fn defaults(path: PathBuf, data: SourceData) -> Self {
+ Self { path: path, content: Source::defaults(data) }
+ }
+}
+
+/// A basic SquashFS writer.
+///
+/// This provides a simple interface for writing archives. The user calls [`open`](Self::open),
+/// [`add`](Self::add) to add each node, and [`finish`](Self::finish) to finish writing. This is
+/// intended for writing archives that are generated by code or otherwise not reflected by files in
+/// a file system -- if you want to archive a tree of files from disk, [`TreeProcessor`] handles
+/// directory tracking so that you don't have to do it yourself.
+///
+/// **Each node must be written before its parent**, and an error will be raised if this invariant
+/// is not maintained -- however, this is not detected until `finish` is called.
+///
+/// let writer = Writer::open("archive.sfs")?;
+/// let mut ids = HashMap::new();
+/// for i in 0..5 {
+/// let mut content = format!("This is the content of file {}.txt.", i).as_bytes();
+/// let source = Source::defaults(SourceData::File(Box::new(content)));
+/// let id = writer.add(source)?;
+/// ids.insert(OsString::from(format!("{}.txt", i)), id);
+/// }
+/// writer.add(Source::defaults(SourceData::Dir(Box::new(ids.into_iter()))))?;
+/// writer.finish()?;
pub struct Writer {
outfile: ManagedPointer<sqfs_file_t>,
#[allow(dead_code)] compressor_config: sqfs_compressor_config_t, // Referenced by `compressor`
@@ -156,6 +312,9 @@ pub struct Writer {
}
impl Writer {
+ /// Open a new output file for writing.
+ ///
+ /// If the file exists, it will be overwritten.
pub fn open<T: AsRef<Path>>(path: T) -> Result<Self> {
let cpath = CString::new(os_to_string(path.as_ref().as_os_str())?)?;
let block_size = SQFS_DEFAULT_BLOCK_SIZE as u64;
@@ -247,6 +406,16 @@ impl Writer {
unsafe { (**self.outfile).get_size.expect("Superblock doesn't provide get_size")(*self.outfile) }
}
+ /// Add the provided `Source` to the archive.
+ ///
+ /// This writes file data and xattrs to the archive directly, while storing directory tree
+ /// information to write when `finish` is called.
+ ///
+ /// The returned value is the inode number of the added `Source`. If the file is to be added
+ /// to a directory (that is, almost always), this number needs to be stored so that it can be
+ /// provided when the directory is added. In the current implementation, inode numbers start
+ /// at 1 for the first file and count steadily upward, but this behavior may change without
+ /// warning.
pub fn add(&mut self, mut source: Source) -> Result<u32> {
let finished = self.finished.read().expect("Poisoned lock");
if *finished { Err(SquashfsError::Finished)?; }
@@ -296,6 +465,9 @@ impl Writer {
Ok(nodes.len() as u32)
}
+ /// Finish writing the archive and flush all contents to disk.
+ ///
+ /// It is an error to call `add` after this has been run.
pub fn finish(&mut self) -> Result<()> {
*self.finished.write().expect("Poisoned lock") = true;
let nodes = self.nodes.lock().expect("Poisoned lock");
@@ -351,17 +523,45 @@ impl Writer {
unsafe impl Sync for Writer { }
unsafe impl Send for Writer { }
+/// Tool to help create an archive from a directory in the filesystem.
+///
+/// This wraps a [`Writer`] and takes care of tracking the directory hierarchy as files are added,
+/// populating the iterators of [`SourceData::Dir`]s as necessary.
+///
+/// To simply create a SquashFS file from a chosen directory, call [`process`](Self::process):
+///
+/// TreeProcessor::new("archive.sfs")?.process("/home/me/test")?
+///
+/// For more control over the addition process -- for example, to exclude certain files, add
+/// extended attributes, ignore errors, or print files as they are added -- use
+/// [`iter`](Self::iter) to get an iterator over the directory tree, and then call
+/// [`add`](Self::add) on each `SourceFile` yielded after applying any desired transformations.
+/// After the iterator finishes, remember to call [`finish`](Self::finish).
+///
+/// let processor = TreeProcessor::new("archive.sfs")?;
+/// for mut entry in processor.iter("/home/me/test") {
+/// entry.content.mode = 0x1ff; // Set all nodes to be read/writable by anyone
+/// match processor.add(entry) {
+/// Ok(id) => println!("{}: {}", id, entry.path),
+/// Err(_) => println!("Failed adding {}", entry.path),
+/// }
+/// }
+/// processor.finish()?;
pub struct TreeProcessor {
- root: PathBuf,
writer: Mutex<Writer>,
childmap: Mutex<HashMap<PathBuf, BTreeMap<OsString, u32>>>,
}
impl TreeProcessor {
- pub fn new<P: AsRef<Path>>(writer: Writer, root: P) -> Result<Self> {
- Ok(Self { root: root.as_ref().to_path_buf(), writer: Mutex::new(writer), childmap: Mutex::new(HashMap::new()) })
+ /// Create a new `TreeProcessor` for an output file.
+ pub fn new<P: AsRef<Path>>(outfile: P) -> Result<Self> {
+ let writer = Writer::open(outfile)?;
+ Ok(Self { writer: Mutex::new(writer), childmap: Mutex::new(HashMap::new()) })
}
+ /// Add a new file to the archive.
+ ///
+ /// It is not recommended to call this on `SourceFile`s that were not yielded by `iter`.
pub fn add(&self, mut source: SourceFile) -> Result<u32> {
let mut childmap = self.childmap.lock().expect("Poisoned lock");
if let SourceData::Dir(children) = &mut source.content.data {
@@ -377,6 +577,7 @@ impl TreeProcessor {
Ok(id)
}
+ /// Finish writing the archive.
pub fn finish(&self) -> Result<()> {
self.writer.lock().expect("Poisoned lock").finish()
}
@@ -391,7 +592,7 @@ impl TreeProcessor {
SourceData::File(Box::new(std::fs::File::open(entry.path())?))
}
else if metadata.file_type().is_symlink() {
- SourceData::Symlink(std::fs::read_link(entry.path())?.into_os_string())
+ SourceData::Symlink(std::fs::read_link(entry.path())?)
}
else {
Err(SquashfsError::WriteType(metadata.file_type()))?;
@@ -411,12 +612,28 @@ impl TreeProcessor {
Ok(source)
}
- pub fn iter<'a>(&'a self) -> TreeIterator<'a> {
- let tree = WalkDir::new(&self.root).follow_links(false).contents_first(true);
+ /// Create an iterator over a directory tree, yielding them in a form suitable to pass to
+ /// `add`.
+ pub fn iter<'a, P: AsRef<Path>>(&'a self, root: P) -> TreeIterator<'a> {
+ let tree = WalkDir::new(root).follow_links(false).contents_first(true);
TreeIterator { processor: self, tree: tree.into_iter() }
}
+
+ /// Add an entire directory tree to the archive, then finish it.
+ ///
+ /// This is the most basic, bare-bones way to create a full archive from an existing directory
+ /// tree. This offers no way to customize the archive or handle errors gracefully.
+ pub fn process<P: AsRef<Path>>(self, root: P) -> Result<()> {
+ for entry in self.iter(root) { self.add(entry?)?; }
+ self.finish()?;
+ Ok(())
+ }
}
+/// An iterator yielding the nodes in a directory tree in a way suitable for archiving.
+///
+/// This is created by a [`TreeProcessor`] and the items yielded are intended to be
+/// [`add`](TreeProcessor::add)ed to it.
pub struct TreeIterator<'a> {
processor: &'a TreeProcessor,
tree: walkdir::IntoIter,