From af572aaa4c989afe61ad9f275e419479fa273a07 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Sat, 16 Mar 2019 11:02:12 +0100 Subject: [PATCH] pxar: implement hardlinks So we are no longer compatible with catar ... --- src/pxar.rs | 8 ++++ src/pxar/encoder.rs | 68 +++++++++++++++++--------- src/pxar/format_definition.rs | 3 ++ src/pxar/sequential_decoder.rs | 87 ++++++++++++++++++++++++++-------- 4 files changed, 125 insertions(+), 41 deletions(-) diff --git a/src/pxar.rs b/src/pxar.rs index aba57fa5..a488351e 100644 --- a/src/pxar.rs +++ b/src/pxar.rs @@ -38,6 +38,14 @@ //! * ... //! * GOODBYE -- lookup table at the end of a list of directory entries +///! The original format has no way to deal with hardlinks, so we +///! extended the format by a special HARDLINK tag, which can replace +///! an ENTRY tag. The HARDLINK tag contains an 64bit offset which +///! points to the linked ENTRY inside the archive, followed by the +///! full path name of that ENTRY. HARDLINKs may not have further data +///! (user, group, acl, ...) because this is already defined by the +///! linked ENTRY. + mod binary_search_tree; pub use binary_search_tree::*; diff --git a/src/pxar/encoder.rs b/src/pxar/encoder.rs index 2044ff99..ae87ff5a 100644 --- a/src/pxar/encoder.rs +++ b/src/pxar/encoder.rs @@ -44,7 +44,7 @@ pub struct Encoder<'a, W: Write> { all_file_systems: bool, root_st_dev: u64, verbose: bool, - hardlinks: HashMap, + hardlinks: HashMap, } impl <'a, W: Write> Encoder<'a, W> { @@ -355,35 +355,47 @@ impl <'a, W: Write> Encoder<'a, W> { } else if ifmt == libc::S_IFREG { + let mut hardlink_target = None; + if stat.st_nlink > 1 { let link_info = HardLinkInfo { st_dev: stat.st_dev, st_ino: stat.st_ino }; - if let Some(target) = self.hardlinks.get(&link_info) { - // fixme: store hardlink info somwhow? - eprintln!("FOUND HARDLINK {:?}", target); - } else { - self.hardlinks.insert(link_info, self.relative_path.clone()); + hardlink_target = self.hardlinks.get(&link_info).map(|(v, offset)| { + let mut target = v.clone().into_os_string(); + target.push("\0"); // add Nul byte + (target, (start_pos as u64) - offset) + }); + if hardlink_target == None { + self.hardlinks.insert(link_info, (self.relative_path.clone(), start_pos as u64)); } } - let filefd = match nix::fcntl::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) { - Ok(filefd) => filefd, - Err(nix::Error::Sys(Errno::ENOENT)) => { - self.report_vanished_file(&self.full_path())?; - continue; - }, - Err(err) => bail!("open file {:?} failed - {}", self.full_path(), err), - }; + if let Some((target, offset)) = hardlink_target { + + self.write_filename(&filename)?; + self.encode_hardlink(target.as_bytes(), offset)?; - let child_magic = if dir_stat.st_dev != stat.st_dev { - detect_fs_type(filefd)? } else { - magic - }; - self.write_filename(&filename)?; - let res = self.encode_file(filefd, &stat, child_magic); - let _ = nix::unistd::close(filefd); // ignore close errors - res?; + let filefd = match nix::fcntl::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) { + Ok(filefd) => filefd, + Err(nix::Error::Sys(Errno::ENOENT)) => { + self.report_vanished_file(&self.full_path())?; + continue; + }, + Err(err) => bail!("open file {:?} failed - {}", self.full_path(), err), + }; + + let child_magic = if dir_stat.st_dev != stat.st_dev { + detect_fs_type(filefd)? + } else { + magic + }; + + self.write_filename(&filename)?; + let res = self.encode_file(filefd, &stat, child_magic); + let _ = nix::unistd::close(filefd); // ignore close errors + res?; + } } else if ifmt == libc::S_IFLNK { let mut buffer = [0u8; libc::PATH_MAX as usize]; @@ -540,6 +552,18 @@ impl <'a, W: Write> Encoder<'a, W> { Ok(()) } + fn encode_hardlink(&mut self, target: &[u8], offset: u64) -> Result<(), Error> { + + //println!("encode_hardlink: {:?} -> {:?}", self.full_path(), target); + + // Note: HARDLINK replaces an ENTRY. + self.write_header(PXAR_FORMAT_HARDLINK, (target.len() as u64) + 8)?; + self.write_item(offset)?; + self.write(target)?; + + Ok(()) + } + // the report_XXX method may raise and error - depending on encoder configuration fn report_vanished_file(&self, path: &Path) -> Result<(), Error> { diff --git a/src/pxar/format_definition.rs b/src/pxar/format_definition.rs index b03233e9..02aa25de 100644 --- a/src/pxar/format_definition.rs +++ b/src/pxar/format_definition.rs @@ -15,6 +15,9 @@ pub const CA_FORMAT_FILENAME: u64 = 0x6dbb6ebcb3161f0b; pub const CA_FORMAT_SYMLINK: u64 = 0x664a6fb6830e0d6c; pub const CA_FORMAT_DEVICE: u64 = 0xac3dace369dfe643; +// compute_goodbye_hash(b"__PROXMOX_FORMAT_HARDLINK__"); +pub const PXAR_FORMAT_HARDLINK: u64 = 0x2c5e06f634f65b86; + pub const CA_FORMAT_PAYLOAD: u64 = 0x8b9e1d93d6dcffc9; pub const CA_FORMAT_GOODBYE: u64 = 0xdfd35c5e8327c403; diff --git a/src/pxar/sequential_decoder.rs b/src/pxar/sequential_decoder.rs index 22663e5f..082cbe24 100644 --- a/src/pxar/sequential_decoder.rs +++ b/src/pxar/sequential_decoder.rs @@ -54,14 +54,14 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { Ok(result.from_le()) } - fn read_symlink(&mut self, size: u64) -> Result { + fn read_link(&mut self, size: u64) -> Result { if size < (HEADER_SIZE + 2) { - bail!("dectected short symlink target."); + bail!("dectected short link target."); } let target_len = size - HEADER_SIZE; if target_len > (libc::PATH_MAX as u64) { - bail!("symlink target too long ({}).", target_len); + bail!("link target too long ({}).", target_len); } let mut buffer = vec![0u8; target_len as usize]; @@ -69,12 +69,31 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { let last_byte = buffer.pop().unwrap(); if last_byte != 0u8 { - bail!("symlink target not nul terminated."); + bail!("link target not nul terminated."); } Ok(PathBuf::from(std::ffi::OsString::from_vec(buffer))) } + fn read_hardlink(&mut self, size: u64) -> Result<(PathBuf, u64), Error> { + if size < (HEADER_SIZE + 8 + 2) { + bail!("dectected short hardlink header."); + } + let offset: u64 = self.read_item()?; + let target = self.read_link(size - 8)?; + + for c in target.components() { + match c { + std::path::Component::Normal(_) => { /* OK */ }, + _ => { + bail!("hardlink target contains invalid component {:?}", c); + } + } + } + + Ok((target, offset)) + } + pub (crate) fn read_filename(&mut self, size: u64) -> Result { if size < (HEADER_SIZE + 2) { bail!("dectected short filename"); @@ -250,13 +269,15 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { Err(err) => bail!("unable to open target directory {:?} - {}", path, err), }; - self.restore_sequential(&mut path.to_owned(), &OsString::new(), &dir, callback) + let mut relative_path = PathBuf::new(); + self.restore_sequential(path, &mut relative_path, &OsString::new(), &dir, callback) } fn restore_sequential( &mut self, - path: &mut PathBuf, // used for error reporting - filename: &OsStr, // repeats path last component + base_path: &Path, + relative_path: &mut PathBuf, + filename: &OsStr, // repeats path last relative_path component parent: &nix::dir::Dir, callback: &F, ) -> Result<(), Error> @@ -265,12 +286,23 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { let parent_fd = parent.as_raw_fd(); - // read ENTRY first + let full_path = base_path.join(&relative_path); + + (callback)(&full_path)?; + let head: CaFormatHeader = self.read_item()?; + + if head.htype == PXAR_FORMAT_HARDLINK { + let (target, _offset) = self.read_hardlink(head.size)?; + let target_path = base_path.join(&target); + //println!("HARDLINK: {} {:?} -> {:?}", offset, full_path, target_path); + hardlink(&target_path, &full_path)?; + return Ok(()); + } + check_ca_header::(&head, CA_FORMAT_ENTRY)?; let entry: CaFormatEntry = self.read_item()?; - (callback)(path)?; let mode = entry.mode as u32; //fixme: upper 32bits? @@ -283,7 +315,7 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { } else { dir = match dir_mkdirat(parent_fd, filename, true) { Ok(dir) => dir, - Err(err) => bail!("unable to open directory {:?} - {}", path, err), + Err(err) => bail!("unable to open directory {:?} - {}", full_path, err), }; } @@ -291,10 +323,9 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { while head.htype == CA_FORMAT_FILENAME { let name = self.read_filename(head.size)?; - path.push(&name); - //println!("NAME: {:?}", path); - self.restore_sequential(path, &name, &dir, callback)?; - path.pop(); + relative_path.push(&name); + self.restore_sequential(base_path, relative_path, &name, &dir, callback)?; + relative_path.pop(); head = self.read_item()?; } @@ -316,7 +347,7 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { } if filename.is_empty() { - bail!("got empty file name at {:?}", path) + bail!("got empty file name at {:?}", full_path) } if ifmt == libc::S_IFLNK { @@ -326,10 +357,10 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { let head: CaFormatHeader = self.read_item()?; match head.htype { CA_FORMAT_SYMLINK => { - let target = self.read_symlink(head.size)?; + let target = self.read_link(head.size)?; //println!("TARGET: {:?}", target); if let Err(err) = symlinkat(&target, parent_fd, filename) { - bail!("create symlink {:?} failed - {}", path, err); + bail!("create symlink {:?} failed - {}", full_path, err); } } _ => { @@ -395,7 +426,7 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { let mut file = match file_openat(parent_fd, filename, flags, open_mode) { Ok(file) => file, - Err(err) => bail!("open file {:?} failed - {}", path, err), + Err(err) => bail!("open file {:?} failed - {}", full_path, err), }; let head = self.restore_attributes(&entry)?; @@ -454,6 +485,14 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { println!("{:?}", path); } + if head.htype == PXAR_FORMAT_HARDLINK { + let (target, offset) = self.read_hardlink(head.size)?; + if verbose { + println!("Hardlink: {} {:?}", offset, target); + } + return Ok(()); + } + check_ca_header::(&head, CA_FORMAT_ENTRY)?; let entry: CaFormatEntry = self.read_item()?; @@ -509,7 +548,7 @@ impl <'a, R: Read> SequentialDecoder<'a, R> { match head.htype { CA_FORMAT_SYMLINK => { - let target = self.read_symlink(head.size)?; + let target = self.read_link(head.size)?; if verbose { println!("Symlink: {:?}", target); } @@ -622,6 +661,16 @@ fn dir_mkdirat(parent: RawFd, filename: &OsStr, create_new: bool) -> Result Result<(), Error> { + oldpath.with_nix_path(|oldpath| { + newpath.with_nix_path(|newpath| { + let res = unsafe { libc::link(oldpath.as_ptr(), newpath.as_ptr()) }; + Errno::result(res)?; + Ok(()) + })? + })? +} + fn symlinkat(target: &Path, parent: RawFd, linkname: &OsStr) -> Result<(), Error> { target.with_nix_path(|target| {