diff options
author | Matthew Schauer <matthew.schauer@e10x.net> | 2021-05-19 14:58:43 -0700 |
---|---|---|
committer | Matthew Schauer <matthew.schauer@e10x.net> | 2021-05-19 14:58:43 -0700 |
commit | b7684c5a4c0f34003c7ee96f03fab4fa4c11aeb5 (patch) | |
tree | d96fd135041b0ec8b0337c7a6ff95bc32f40270d | |
parent | 6364ae1642dfa4e607daf9567d1e02a873848ed7 (diff) |
Protection against adding files to TreeProcessor out of order
-rw-r--r-- | src/lib.rs | 11 | ||||
-rw-r--r-- | src/read.rs | 5 | ||||
-rw-r--r-- | src/write.rs | 61 |
3 files changed, 56 insertions, 21 deletions
@@ -61,9 +61,9 @@ type BoxedError = Box<dyn std::error::Error + std::marker::Send + std::marker::S #[repr(i32)] pub enum LibError { #[error("Failed to allocate memory")] Alloc = SQFS_ERROR_SQFS_ERROR_ALLOC, - #[error("Generic I/O failure occurred")] Io = SQFS_ERROR_SQFS_ERROR_IO, + #[error("Generic I/O failure")] Io = SQFS_ERROR_SQFS_ERROR_IO, #[error("Compressor failed to extract data")] Compressor = SQFS_ERROR_SQFS_ERROR_COMPRESSOR, - #[error("Internal error occurred")] Internal = SQFS_ERROR_SQFS_ERROR_INTERNAL, + #[error("Internal error")] Internal = SQFS_ERROR_SQFS_ERROR_INTERNAL, #[error("Archive file appears to be corrupted")] Corrupted = SQFS_ERROR_SQFS_ERROR_CORRUPTED, #[error("Unsupported feature used")] Unsupported = SQFS_ERROR_SQFS_ERROR_UNSUPPORTED, #[error("Archive would overflow memory")] Overflow = SQFS_ERROR_SQFS_ERROR_OVERFLOW, @@ -89,7 +89,7 @@ pub enum SquashfsError { #[error("Encoded string is not valid UTF-8")] Utf8(#[from] std::string::FromUtf8Error), #[error("OS string is not valid UTF-8")] OsUtf8(OsString), #[error("{0}: {1}")] LibraryError(String, LibError), - #[error("{0}: Unknown error {1} in Squashfs library")] UnknownLibraryError(String, i32), + #[error("{0}: Unknown error {1} in SquashFS library")] UnknownLibraryError(String, i32), #[error("{0}: Squashfs library did not return expected value")] LibraryReturnError(String), #[error("{0}")] LibraryNullError(String), #[error("Symbolic link chain exceeds {0} elements")] LinkChain(i32), // Can I use a const in the formatting string? @@ -104,11 +104,12 @@ pub enum SquashfsError { #[error("Memory mapping failed: {0}")] Mmap(std::io::Error), #[error("Couldn't get the current system time: {0}")] Time(#[from] std::time::SystemTimeError), #[error("Refusing to create empty archive")] Empty, - #[error("Tried to write directory {0} before child {1}")] WriteOrder(u32, u32), + #[error("Tried to write parent directory before child node {0}")] WriteOrder(u32), #[error("Tried to write unknown or unsupported file type")] WriteType(std::fs::FileType), #[error("Callback returned an error")] WrappedError(BoxedError), #[error("Failed to retrieve xattrs for {0}: {1}")] Xattr(PathBuf, std::io::Error), #[error("Tried to add files to a writer that was already finished")] Finished, + #[error("Internal error: {0}")] Internal(String), } /// Result type returned by SquashFS library operations. @@ -127,7 +128,7 @@ fn sfs_check(code: i32, desc: &str) -> Result<i32> { fn sfs_destroy<T>(x: *mut T) { unsafe { let obj = x as *mut sqfs_object_t; - ((*obj).destroy.expect("Squashfs object did not provide a destroy callback"))(obj); + ((*obj).destroy.expect("SquashFS object did not provide a destroy callback"))(obj); } } diff --git a/src/read.rs b/src/read.rs index 1c6ad41..9a3a6c1 100644 --- a/src/read.rs +++ b/src/read.rs @@ -477,7 +477,7 @@ impl<'a> std::ops::DerefMut for OwnedDir<'a> { /// This corresponds to the inode and directory entry structures of the underlying library. /// Because SquashFS inodes do not retain pointers back to their directory entries, inodes by /// default have no information about their positions in the directory tree. To work around this, -/// `Node` structs store their path and propagate it through calls like [`child`](Dir::child) and +/// a `Node` struct stores its path and propagate it through calls like [`child`](Dir::child) and /// [`parent`](Self::parent). If the `Node` was originally constructed in a way that does not /// provide path information, such as retrieving a node by inode number using [`Archive::get_id`], /// then the methods that require knowledge of the node's location in the tree, such as @@ -781,7 +781,7 @@ impl Archive { let mut locked_readers = self.data_readers.lock().expect(LOCK_ERR); let ret = match locked_readers.pop() { Some(reader) => reader, - None => { println!("Made data reader"); DataReader::new(&self)? }, + None => { DataReader::new(&self)? }, }; Ok(Leased::new(&self.data_readers, ret)) } @@ -860,6 +860,7 @@ impl Archive { sfs_check(sqfs_meta_reader_read(*export_reader, &mut noderef as *mut u64 as *mut libc::c_void, 8), "Couldn't read inode reference")?; } let (block, offset) = unpack_meta_ref(noderef); + println!("Node {} at block {}, offset {}", id, block, offset); let inode = sfs_init_ptr(&|x| unsafe { sqfs_meta_reader_read_inode(*export_reader, &self.superblock, block, offset, x) }, "Couldn't read inode", libc_free)?; diff --git a/src/write.rs b/src/write.rs index 6c9c281..49e9f72 100644 --- a/src/write.rs +++ b/src/write.rs @@ -460,9 +460,10 @@ impl Writer { _ => None, }; let mut nodes = self.nodes.lock().expect("Poisoned lock"); - unsafe { (***inode).base.inode_number = nodes.len() as u32 + 1; } + let nodenum = nodes.len() as u32 + 1; + unsafe { (***inode).base.inode_number = nodenum; } nodes.push(RefCell::new(IntermediateNode { inode: inode, dir_children: dir_children, pos: 0 })); - Ok(nodes.len() as u32) + Ok(nodenum) } /// Finish writing the archive and flush all contents to disk. @@ -481,7 +482,7 @@ impl Writer { sfs_check(sqfs_dir_writer_begin(*self.dir_writer, 0), "Couldn't start writing directory")?; // For each child, need: name, ID, reference, mode for (name, child_id) in children { // On disk children need to be sorted -- I think the library takes care of this - if child_id >= id { Err(SquashfsError::WriteOrder(id, child_id))?; } + if child_id >= id { Err(SquashfsError::WriteOrder(child_id))?; } let child_node = &nodes[child_id as usize - 1].borrow(); let child = child_node.inode.as_ref(); let child_ref = child_node.pos; @@ -523,6 +524,34 @@ impl Writer { unsafe impl Sync for Writer { } unsafe impl Send for Writer { } +enum ChildMapEntry { + Accumulating(BTreeMap<OsString, u32>), + Done, +} + +impl ChildMapEntry { + fn new() -> Self { + Self::Accumulating(BTreeMap::new()) + } + + fn add(&mut self, name: OsString, id: u32) -> Result<()> { + match self { + Self::Done => Err(SquashfsError::WriteOrder(id))?, + Self::Accumulating(children) => { + children.insert(name, id); + Ok(()) + }, + } + } + + fn finish(&mut self) -> Result<BTreeMap<OsString, u32>> { + match std::mem::replace(self, Self::Done) { + Self::Done => Err(SquashfsError::Internal("Tried to finish directory in tree processor multiple times".to_string()))?, + Self::Accumulating(children) => Ok(children), + } + } +} + /// Tool to help create an archive from a directory in the filesystem. /// /// This wraps a [`Writer`] and takes care of tracking the directory hierarchy as files are added, @@ -530,7 +559,7 @@ unsafe impl Send for Writer { } /// /// To simply create a SquashFS file from a chosen directory, call [`process`](Self::process): /// -/// TreeProcessor::new("archive.sfs")?.process("/home/me/test")? +/// TreeProcessor::new("archive.sfs")?.process("/home/me/test")?; /// /// For more control over the addition process -- for example, to exclude certain files, add /// extended attributes, ignore errors, or print files as they are added -- use @@ -547,9 +576,14 @@ unsafe impl Send for Writer { } /// } /// } /// processor.finish()?; +/// +/// It is safe to process the tree using multiple threads, but it is *the caller's responsibility* +/// to ensure that any out-of-order execution does not cause child nodes to be `add`ed after their +/// parent directories. If this happens, [`WriteOrder`](SquashfsError::WriteOrder) will be +/// raised and the node will not be added. pub struct TreeProcessor { writer: Mutex<Writer>, - childmap: Mutex<HashMap<PathBuf, BTreeMap<OsString, u32>>>, + childmap: Mutex<HashMap<PathBuf, ChildMapEntry>>, } impl TreeProcessor { @@ -564,15 +598,14 @@ impl TreeProcessor { /// It is not recommended to call this on `SourceFile`s that were not yielded by `iter`. pub fn add(&self, mut source: SourceFile) -> Result<u32> { let mut childmap = self.childmap.lock().expect("Poisoned lock"); - if let SourceData::Dir(children) = &mut source.content.data { - // Pull in any last-minute additions made by the user - let mut new_children = childmap.remove(&source.path).unwrap_or(BTreeMap::new()); - new_children.extend(children); - source.content.data = SourceData::Dir(Box::new(new_children.into_iter())); + if let SourceData::Dir(old_children) = &mut source.content.data { + let mut children = childmap.entry(source.path.clone()).or_insert(ChildMapEntry::new()).finish()?; + children.extend(old_children); + source.content.data = SourceData::Dir(Box::new(children.into_iter())); } let id = self.writer.lock().expect("Poisoned lock").add(source.content)?; if let Some(parent) = source.path.parent() { - childmap.entry(parent.to_path_buf()).or_insert(BTreeMap::new()).insert(source.path.file_name().expect("Path from walkdir has no file name").to_os_string(), id); + childmap.entry(parent.to_path_buf()).or_insert(ChildMapEntry::new()).add(source.path.file_name().expect("Path from walkdir has no file name").to_os_string(), id)?; } Ok(id) } @@ -586,7 +619,7 @@ impl TreeProcessor { let metadata = entry.metadata().unwrap(); let mtime = metadata.modified()?.duration_since(SystemTime::UNIX_EPOCH)?.as_secs() as u32; let data = if metadata.file_type().is_dir() { - SourceData::Dir(Box::new(self.childmap.lock().expect("Poisoned lock").remove(&entry.path().to_path_buf()).unwrap_or(BTreeMap::new()).into_iter())) + SourceData::Dir(Box::new(BTreeMap::new().into_iter())) } else if metadata.file_type().is_file() { SourceData::File(Box::new(std::fs::File::open(entry.path())?)) @@ -648,12 +681,12 @@ impl<'a> std::iter::Iterator for TreeIterator<'a> { Some(Ok(entry)) => { let path = entry.path().to_path_buf(); Some(self.processor.make_source(entry).map(|source| SourceFile { path: path, content: source })) - } + }, Some(Err(e)) => { let path = e.path().map(|x| x.to_string_lossy().into_owned()).unwrap_or("(unknown)".to_string()); eprintln!("Not processing {}: {}", path, e.to_string()); self.next() - } + }, } } } |