diff options
Diffstat (limited to 'src/write.rs')
-rw-r--r-- | src/write.rs | 235 |
1 files changed, 226 insertions, 9 deletions
diff --git a/src/write.rs b/src/write.rs index fb15ed1..6c9c281 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1,3 +1,26 @@ +//! Facilities for writing SquashFS archives. +//! +//! The most straightforward way to write a SquashFS file from a directory tree on-disk is to use a +//! [`TreeProcessor`]. This provides the ability to make "last-minute" modifications to the files +//! that are added, such as skipping certain files or modifying metadata. +//! +//! To create a totally "synthetic" SquashFS file that is not built from files in a filesystem, +//! open a [`Writer`] and feed [`Source`]s to it. +//! +//! # Limitations +//! +//! This library does not yet handle hard links; files with multiple hard links will be archived as +//! separate files with identical contents (which should be deduplicated and end up taking up +//! little additional space). +//! +//! The SquashFS specification includes a field in directory inodes for the parent inode number, +//! presumably to make `..` directory entries work. This is one factor that makes it impossible to +//! build a SquashFS file without building out the entire directory tree to be archived in memory. +//! I have tried as hard as poassible to reduce the amount of data that must be stored for each +//! node added, and this architecture makes it infeasible to store parent inodes in directory +//! entries. I hope to fix this some day, and in the meantime it has not caused problems in the +//! ways I have used the resultant files. + use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::ffi::{CString, OsString}; @@ -9,33 +32,126 @@ use super::*; use super::SquashfsError; use walkdir::{DirEntry, WalkDir}; +/// Flags to fine-tune how an entry is added to the archive. +/// +/// These valued can be ORed together and passed in the [`flags`](Source::flags) field of a +/// [`Source`] object. #[repr(u32)] pub enum BlockFlags { + /// Don't compress file data. + /// + /// By default, files are compressed, and the compressed version is stored in the archive if it + /// is smaller than the uncompressed version. Setting this flag will force the file to be + /// stored uncompressed. DontCompress = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_COMPRESS, + + /// Align the file data to device blocks. + /// + /// If set, padding will be added before and after this file's data blocks so that it is + /// aligned to the blocks of the underlying disk. BlockAlign = super::SQFS_BLK_FLAGS_SQFS_BLK_ALIGN, + + /// Store the tail of the file in a regular data block rather than a fragment block. + /// + /// The compressed content of a file to be written to an archive is split into equally-sized + /// blocks and stored as "data blocks". The final chunk is usually smaller than the rest, so + /// these final chunks are collected from multiple files are collected and stored together in + /// separate "fragment blocks" as an optimization. If there is a reason for the entire file's + /// contents to be stored together, fragmentation can be disabled using this flag. DontFragment = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_FRAGMENT, + + /// Don't deduplicated data blocks for this file. + /// + /// If two files contain an identical data block, the block will be stored only once and both + /// files' block indices will point to this single block. The user can force all blocks of a + /// file to be stored by setting this flag. DontDeduplicate = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_DEDUPLICATE, + + /// Don't elide sparse blocks. + /// + /// If a block of a file contains only zeros, it will not be stored at all and the file's block + /// index will mark that the block is all-zero. This behavior can be disabled so that a zero + /// block will be written by setting this flag. IgnoreSparse = super::SQFS_BLK_FLAGS_SQFS_BLK_IGNORE_SPARSE, + + /// Don't compute block checksums for this file. + /// + /// Each data block is checksummed to verify data integrity unless this flag is set. DontHash = super::SQFS_BLK_FLAGS_SQFS_BLK_DONT_HASH, } +/// Represents the data of a filesystem object that can be added to an archive. +/// +/// When creating the archive, this object is read from a [`Source`] (which additionally describes +/// the filesystem attributes of the node) and used to set the type and contents of the node. pub enum SourceData { + /// Create a file with the provided contents. + /// + /// The contained object will be read and its contents placed in the file written to the + /// archive. File(Box<dyn Read + Sync + Send>), + + /// Create a directory with the given chidren. + /// + /// The creator must provide an iterator over [`OsString`] and `u32`, which respectively + /// represent the name and inode number of each child of this directory. This is one of the + /// hardest parts about writing archive contents -- all children of each directory must be + /// written before the directory itself, so that the inode numbers of the children are known. + /// [`TreeProcessor`] facilitates this by performing a post-order traversal of a filesystem, + /// ensuring that files are written in the correct order. Dir(Box<dyn Iterator<Item=(OsString, u32)> + Sync + Send>), - Symlink(OsString), + + /// Create a symbolic link to the given path. + /// + /// It is not required for the target of the symlink to exist. + Symlink(PathBuf), + + /// Create a block device file with the given major and minor device numbers. BlockDev(u32, u32), + + /// Create a character device file with the given major and minor device numbers. CharDev(u32, u32), + + /// Create a named pipe. Fifo, + + /// Create a socket. Socket, } +/// A single node to be added to the SquashFS archive. +/// +/// This contains a [`SourceData`] instance containing the actual data of the node, along with +/// metadata such as permissions and extended attributes. The path to the node is not part of this +/// object, because all information necessary to reconstruct the directory tree is contained in the +/// directory iterators. However, for higher-level mechanisms that abstract away details such as +/// inode numbers, it is helpful to associate a path with each `Source`; [`SourceFile`] is used for +/// this purpose. +/// +/// This object is designed to be constructed by the user by setting all fields to the appropriate +/// values. pub struct Source { + /// The type of the node and the data it contains. pub data: SourceData, + + /// The UID of the file. pub uid: u32, + + /// The GID of the file. pub gid: u32, + + /// The file mode. pub mode: u16, + + /// The modification time of the file as a Unix timestamp. pub modified: u32, + + /// Extended attributes on the node. Each one must start with a valid xattr namespace (such as + /// "user.", and the values can be arbitrary byte strings. pub xattrs: HashMap<OsString, Vec<u8>>, + + /// [`BlockFlags`] to set on the node to control how its contents are archived. Multiple flags + /// can be combined using `|`. pub flags: u32, } @@ -63,6 +179,7 @@ fn copy_metadata(src: &ManagedPointer<sqfs_inode_generic_t>, dst: &mut ManagedPo } impl Source { + /// Construct a `Source` from a `SourceData`, using defaults for all metadata fields. pub fn defaults(data: SourceData) -> Self { Self { data: data, uid: 0, gid: 0, mode: 0x1ff, modified: 0, xattrs: HashMap::new(), flags: 0 } } @@ -71,7 +188,6 @@ impl Source { ((min & 0xfff00) << 20) | ((maj & 0xfff) << 8) | (min & 0xff) } - // TODO Handle hard links unsafe fn to_inode(&self, link_count: u32) -> Result<ManagedPointer<sqfs_inode_generic_t>> { unsafe fn create_inode(kind: SQFS_INODE_TYPE, extra: usize) -> ManagedPointer<sqfs_inode_generic_t> { use std::alloc::{alloc, Layout}; @@ -89,7 +205,7 @@ impl Source { ret }, SourceData::Symlink(dest_os) => { - let dest = os_to_string(&dest_os)?.into_bytes(); + let dest = os_to_string(dest_os.as_os_str())?.into_bytes(); let mut ret = create_inode(SQFS_INODE_TYPE_SQFS_INODE_SLINK, dest.len()); let mut data = &mut (**ret).data.slink; data.nlink = link_count; @@ -133,11 +249,51 @@ struct IntermediateNode { pos: u64, } +/// A [`Source`] bundled with the path where it should be located. +/// +/// While the path of a `Source` is not strictly necessary to build the directory tree, it is a +/// useful way for automatic archive builders like [`TreeProcessor`] to keep track of files as they +/// are being added. +/// +/// For purposes for which the metadata stored in [`Source`], like permissions and xattrs, are +/// unnecessary, [`defaults`](Self::defaults) can be used to conveniently construct a `FileSource` +/// from a [`PathBuf`] and [`SourceData`]. pub struct SourceFile { pub path: PathBuf, pub content: Source, } +impl SourceFile { + /// Wrap a `SourceData` in a new `Source`, using defaults for all metadata fields. + /// + /// This sets UID and GID to 0 and permissions to 0o777, gives a null modification time and no + /// xattrs, and sets no flags. + pub fn defaults(path: PathBuf, data: SourceData) -> Self { + Self { path: path, content: Source::defaults(data) } + } +} + +/// A basic SquashFS writer. +/// +/// This provides a simple interface for writing archives. The user calls [`open`](Self::open), +/// [`add`](Self::add) to add each node, and [`finish`](Self::finish) to finish writing. This is +/// intended for writing archives that are generated by code or otherwise not reflected by files in +/// a file system -- if you want to archive a tree of files from disk, [`TreeProcessor`] handles +/// directory tracking so that you don't have to do it yourself. +/// +/// **Each node must be written before its parent**, and an error will be raised if this invariant +/// is not maintained -- however, this is not detected until `finish` is called. +/// +/// let writer = Writer::open("archive.sfs")?; +/// let mut ids = HashMap::new(); +/// for i in 0..5 { +/// let mut content = format!("This is the content of file {}.txt.", i).as_bytes(); +/// let source = Source::defaults(SourceData::File(Box::new(content))); +/// let id = writer.add(source)?; +/// ids.insert(OsString::from(format!("{}.txt", i)), id); +/// } +/// writer.add(Source::defaults(SourceData::Dir(Box::new(ids.into_iter()))))?; +/// writer.finish()?; pub struct Writer { outfile: ManagedPointer<sqfs_file_t>, #[allow(dead_code)] compressor_config: sqfs_compressor_config_t, // Referenced by `compressor` @@ -156,6 +312,9 @@ pub struct Writer { } impl Writer { + /// Open a new output file for writing. + /// + /// If the file exists, it will be overwritten. pub fn open<T: AsRef<Path>>(path: T) -> Result<Self> { let cpath = CString::new(os_to_string(path.as_ref().as_os_str())?)?; let block_size = SQFS_DEFAULT_BLOCK_SIZE as u64; @@ -247,6 +406,16 @@ impl Writer { unsafe { (**self.outfile).get_size.expect("Superblock doesn't provide get_size")(*self.outfile) } } + /// Add the provided `Source` to the archive. + /// + /// This writes file data and xattrs to the archive directly, while storing directory tree + /// information to write when `finish` is called. + /// + /// The returned value is the inode number of the added `Source`. If the file is to be added + /// to a directory (that is, almost always), this number needs to be stored so that it can be + /// provided when the directory is added. In the current implementation, inode numbers start + /// at 1 for the first file and count steadily upward, but this behavior may change without + /// warning. pub fn add(&mut self, mut source: Source) -> Result<u32> { let finished = self.finished.read().expect("Poisoned lock"); if *finished { Err(SquashfsError::Finished)?; } @@ -296,6 +465,9 @@ impl Writer { Ok(nodes.len() as u32) } + /// Finish writing the archive and flush all contents to disk. + /// + /// It is an error to call `add` after this has been run. pub fn finish(&mut self) -> Result<()> { *self.finished.write().expect("Poisoned lock") = true; let nodes = self.nodes.lock().expect("Poisoned lock"); @@ -351,17 +523,45 @@ impl Writer { unsafe impl Sync for Writer { } unsafe impl Send for Writer { } +/// Tool to help create an archive from a directory in the filesystem. +/// +/// This wraps a [`Writer`] and takes care of tracking the directory hierarchy as files are added, +/// populating the iterators of [`SourceData::Dir`]s as necessary. +/// +/// To simply create a SquashFS file from a chosen directory, call [`process`](Self::process): +/// +/// TreeProcessor::new("archive.sfs")?.process("/home/me/test")? +/// +/// For more control over the addition process -- for example, to exclude certain files, add +/// extended attributes, ignore errors, or print files as they are added -- use +/// [`iter`](Self::iter) to get an iterator over the directory tree, and then call +/// [`add`](Self::add) on each `SourceFile` yielded after applying any desired transformations. +/// After the iterator finishes, remember to call [`finish`](Self::finish). +/// +/// let processor = TreeProcessor::new("archive.sfs")?; +/// for mut entry in processor.iter("/home/me/test") { +/// entry.content.mode = 0x1ff; // Set all nodes to be read/writable by anyone +/// match processor.add(entry) { +/// Ok(id) => println!("{}: {}", id, entry.path), +/// Err(_) => println!("Failed adding {}", entry.path), +/// } +/// } +/// processor.finish()?; pub struct TreeProcessor { - root: PathBuf, writer: Mutex<Writer>, childmap: Mutex<HashMap<PathBuf, BTreeMap<OsString, u32>>>, } impl TreeProcessor { - pub fn new<P: AsRef<Path>>(writer: Writer, root: P) -> Result<Self> { - Ok(Self { root: root.as_ref().to_path_buf(), writer: Mutex::new(writer), childmap: Mutex::new(HashMap::new()) }) + /// Create a new `TreeProcessor` for an output file. + pub fn new<P: AsRef<Path>>(outfile: P) -> Result<Self> { + let writer = Writer::open(outfile)?; + Ok(Self { writer: Mutex::new(writer), childmap: Mutex::new(HashMap::new()) }) } + /// Add a new file to the archive. + /// + /// It is not recommended to call this on `SourceFile`s that were not yielded by `iter`. pub fn add(&self, mut source: SourceFile) -> Result<u32> { let mut childmap = self.childmap.lock().expect("Poisoned lock"); if let SourceData::Dir(children) = &mut source.content.data { @@ -377,6 +577,7 @@ impl TreeProcessor { Ok(id) } + /// Finish writing the archive. pub fn finish(&self) -> Result<()> { self.writer.lock().expect("Poisoned lock").finish() } @@ -391,7 +592,7 @@ impl TreeProcessor { SourceData::File(Box::new(std::fs::File::open(entry.path())?)) } else if metadata.file_type().is_symlink() { - SourceData::Symlink(std::fs::read_link(entry.path())?.into_os_string()) + SourceData::Symlink(std::fs::read_link(entry.path())?) } else { Err(SquashfsError::WriteType(metadata.file_type()))?; @@ -411,12 +612,28 @@ impl TreeProcessor { Ok(source) } - pub fn iter<'a>(&'a self) -> TreeIterator<'a> { - let tree = WalkDir::new(&self.root).follow_links(false).contents_first(true); + /// Create an iterator over a directory tree, yielding them in a form suitable to pass to + /// `add`. + pub fn iter<'a, P: AsRef<Path>>(&'a self, root: P) -> TreeIterator<'a> { + let tree = WalkDir::new(root).follow_links(false).contents_first(true); TreeIterator { processor: self, tree: tree.into_iter() } } + + /// Add an entire directory tree to the archive, then finish it. + /// + /// This is the most basic, bare-bones way to create a full archive from an existing directory + /// tree. This offers no way to customize the archive or handle errors gracefully. + pub fn process<P: AsRef<Path>>(self, root: P) -> Result<()> { + for entry in self.iter(root) { self.add(entry?)?; } + self.finish()?; + Ok(()) + } } +/// An iterator yielding the nodes in a directory tree in a way suitable for archiving. +/// +/// This is created by a [`TreeProcessor`] and the items yielded are intended to be +/// [`add`](TreeProcessor::add)ed to it. pub struct TreeIterator<'a> { processor: &'a TreeProcessor, tree: walkdir::IntoIter, |