Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions .vscode/cspell.dictionaries/acronyms+names.wordlist.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# * abbreviations / acronyms
aarch
AIX
ASLR # address space layout randomization
AST # abstract syntax tree
Expand All @@ -9,35 +10,34 @@ DevOps
Ext3
FIFO
FIFOs
flac
FQDN # fully qualified domain name
GID # group ID
GIDs
GNU
GNUEABI
GNUEABIhf
impls
JFS
loongarch
lzma
MSRV # minimum supported rust version
MSVC
NixOS
POSIX
POSIXLY
ReiserFS
RISC
RISCV
RNG # random number generator
RNGs
ReiserFS
Solaris
UID # user ID
UIDs
UUID # universally unique identifier
WASI
WASM
XFS
aarch
flac
impls
lzma
loongarch

# * names
BusyBox
Expand All @@ -48,25 +48,23 @@ Deno
EditorConfig
EPEL
FreeBSD
genric
Gmail
GNU
Illumos
Irix
libfuzzer
MS-DOS
MSDOS
MacOS
MinGW
Minix
MS-DOS
MSDOS
NetBSD
Novell
Nushell
OpenBSD
POSIX
PowerPC
SELinux
SkyPack
Solaris
SysV
Xenix
Yargs
132 changes: 120 additions & 12 deletions src/uu/stat/src/stat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,56 @@ fn pad_and_print(result: &str, left: bool, width: usize, padding: Padding) {
}
}

/// Pads and prints raw bytes (Unix-specific) or falls back to string printing
///
/// On Unix systems, this preserves non-UTF8 data by printing raw bytes
/// On other platforms, falls back to lossy string conversion
fn pad_and_print_bytes<W: Write>(
mut writer: W,
bytes: &[u8],
left: bool,
width: usize,
precision: Precision,
) -> Result<(), std::io::Error> {
let display_bytes = match precision {
Precision::Number(p) if p < bytes.len() => &bytes[..p],
_ => bytes,
};

let display_len = display_bytes.len();
let padding_needed = width.saturating_sub(display_len);

let (left_pad, right_pad) = if left {
(0, padding_needed)
} else {
(padding_needed, 0)
};

if left_pad > 0 {
print_padding(&mut writer, left_pad)?;
}
writer.write_all(display_bytes)?;
if right_pad > 0 {
print_padding(&mut writer, right_pad)?;
}

Ok(())
}

/// print padding based on a writer W and n size
/// writer is genric to be any buffer like: `std::io::stdout`
/// n is the calculated padding size
fn print_padding<W: Write>(writer: &mut W, n: usize) -> Result<(), std::io::Error> {
for _ in 0..n {
writer.write_all(b" ")?;
}
Ok(())
}

#[derive(Debug)]
pub enum OutputType {
pub enum OutputType<'a> {
Str(String),
OsStr(&'a OsString),
Integer(i64),
Unsigned(u64),
UnsignedHex(u64),
Expand Down Expand Up @@ -306,6 +353,7 @@ fn print_it(output: &OutputType, flags: Flags, width: usize, precision: Precisio

match output {
OutputType::Str(s) => print_str(s, &flags, width, precision),
OutputType::OsStr(s) => print_os_str(s, &flags, width, precision),
OutputType::Integer(num) => print_integer(*num, &flags, width, precision, padding_char),
OutputType::Unsigned(num) => print_unsigned(*num, &flags, width, precision, padding_char),
OutputType::UnsignedOct(num) => {
Expand Down Expand Up @@ -354,6 +402,37 @@ fn print_str(s: &str, flags: &Flags, width: usize, precision: Precision) {
pad_and_print(s, flags.left, width, Padding::Space);
}

/// Prints a `OsString` value based on the provided flags, width, and precision.
/// for unix it converts it to bytes then tries to print it if failed print the lossy string version
/// for windows, `OsString` uses UTF-16 internally which doesn't map directly to bytes like Unix,
/// so we fall back to lossy string conversion to handle invalid UTF-8 sequences gracefully
///
/// # Arguments
///
/// * `s` - The `OsString` to be printed.
/// * `flags` - A reference to the Flags struct containing formatting flags.
/// * `width` - The width of the field for the printed string.
/// * `precision` - How many digits of precision, if any.
fn print_os_str(s: &OsString, flags: &Flags, width: usize, precision: Precision) {
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;

let bytes = s.as_bytes();

if pad_and_print_bytes(std::io::stdout(), bytes, flags.left, width, precision).is_err() {
// if an error occurred while trying to print bytes fall back to normal lossy string so it can be printed
let fallback_string = s.to_string_lossy();
print_str(&fallback_string, flags, width, precision);
}
}
#[cfg(not(unix))]
{
let lossy_string = s.to_string_lossy();
print_str(&lossy_string, flags, width, precision);
}
}

fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String {
match quoting_style {
QuotingStyle::Locale | QuotingStyle::Shell => {
Expand Down Expand Up @@ -890,16 +969,12 @@ impl Stater {
})
}

fn find_mount_point<P: AsRef<Path>>(&self, p: P) -> Option<String> {
fn find_mount_point<P: AsRef<Path>>(&self, p: P) -> Option<&OsString> {
let path = p.as_ref().canonicalize().ok()?;

for root in self.mount_list.as_ref()? {
if path.starts_with(root) {
// TODO: This is probably wrong, we should pass the OsString
return Some(root.to_string_lossy().into_owned());
}
}
None
self.mount_list
.as_ref()?
.iter()
.find(|root| path.starts_with(root))
}

fn exec(&self) -> i32 {
Expand Down Expand Up @@ -993,8 +1068,11 @@ impl Stater {
'h' => OutputType::Unsigned(meta.nlink()),
// inode number
'i' => OutputType::Unsigned(meta.ino()),
// mount point: TODO: This should be an OsStr
'm' => OutputType::Str(self.find_mount_point(file).unwrap()),
// mount point
'm' => match self.find_mount_point(file) {
Some(s) => OutputType::OsStr(s),
None => OutputType::Str(String::new()),
},
// file name
'n' => OutputType::Str(display_name.to_string()),
// quoted file name with dereference if symbolic link
Expand Down Expand Up @@ -1300,6 +1378,8 @@ fn pretty_time(meta: &Metadata, md_time_field: MetadataTimeField) -> String {

#[cfg(test)]
mod tests {
use crate::{pad_and_print_bytes, print_padding};

use super::{Flags, Precision, ScanUtil, Stater, Token, group_num, precision_trunc};

#[test]
Expand Down Expand Up @@ -1421,4 +1501,32 @@ mod tests {
assert_eq!(precision_trunc(123.456, Precision::Number(4)), "123.4560");
assert_eq!(precision_trunc(123.456, Precision::Number(5)), "123.45600");
}

#[test]
fn test_pad_and_print_bytes() {
// testing non-utf8 with normal settings
let mut buffer = Vec::new();
let bytes = b"\x80\xFF\x80";
pad_and_print_bytes(&mut buffer, bytes, false, 3, Precision::NotSpecified).unwrap();
assert_eq!(&buffer, b"\x80\xFF\x80");

// testing left padding
let mut buffer = Vec::new();
let bytes = b"\x80\xFF\x80";
pad_and_print_bytes(&mut buffer, bytes, false, 5, Precision::NotSpecified).unwrap();
assert_eq!(&buffer, b" \x80\xFF\x80");

// testing right padding
let mut buffer = Vec::new();
let bytes = b"\x80\xFF\x80";
pad_and_print_bytes(&mut buffer, bytes, true, 5, Precision::NotSpecified).unwrap();
assert_eq!(&buffer, b"\x80\xFF\x80 ");
}

#[test]
fn test_print_padding() {
let mut buffer = Vec::new();
print_padding(&mut buffer, 5).unwrap();
assert_eq!(&buffer, b" ");
}
}
46 changes: 46 additions & 0 deletions tests/by-util/test_stat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,3 +514,49 @@ fn test_stat_selinux() {
let s: Vec<_> = result.stdout_str().split(':').collect();
assert!(s.len() == 4);
}

#[cfg(unix)]
#[test]
fn test_mount_point_basic() {
let ts = TestScenario::new(util_name!());
let result = ts.ucmd().args(&["-c", "%m", "/"]).succeeds();
let output = result.stdout_str().trim();
assert!(!output.is_empty(), "Mount point should not be empty");
assert_eq!(output, "/");
}

#[cfg(unix)]
#[test]
fn test_mount_point_width_and_alignment() {
let ts = TestScenario::new(util_name!());

// Right-aligned, width 15
let result = ts.ucmd().args(&["-c", "%15m", "/"]).succeeds();
let output = result.stdout_str();
assert!(
output.trim().len() <= 15 && output.len() >= 15,
"Output should be padded to width 15"
);

// Left-aligned, width 15
let result = ts.ucmd().args(&["-c", "%-15m", "/"]).succeeds();
let output = result.stdout_str();

assert!(
output.trim().len() <= 15 && output.len() >= 15,
"Output should be padded to width 15 (left-aligned)"
);
}

#[cfg(unix)]
#[test]
fn test_mount_point_combined_with_other_specifiers() {
let ts = TestScenario::new(util_name!());
let result = ts.ucmd().args(&["-c", "%m %n %s", "/bin/sh"]).succeeds();
let output = result.stdout_str();
let parts: Vec<&str> = output.split_whitespace().collect();
assert!(
parts.len() >= 3,
"Should print mount point, file name, and size"
);
}
Loading