evobench_tools/io_utils/
zstd_file.rs1use std::{
4 ffi::{OsStr, OsString},
5 fs::File,
6 io::Read,
7 os::unix::fs::MetadataExt,
8 path::Path,
9 process::{ChildStdout, Command, Stdio},
10};
11
12use anyhow::{Context, Result, anyhow, bail};
13use cj_path_util::unix::polyfill::add_extension;
14use memmap2::{Mmap, MmapOptions};
15use ruzstd::{FrameDecoder, StreamingDecoder};
16
17use crate::{ctx, io_utils::tempfile_utils::TempfileOptions};
18
19const USING_EXTERNAL_TOOL: bool = false;
21
22#[derive(Debug, PartialEq)]
23enum Extension {
24 ZStd,
25 Other,
26}
27
28fn file_extension<P: AsRef<Path>>(path: P, expected_suffix: Option<&str>) -> Result<Extension> {
32 let path = path.as_ref();
33 let ext = path.extension().ok_or_else(|| {
34 let _hold;
35 let extension_msg = if let Some(expected_suffix) = expected_suffix {
36 _hold = format!("{expected_suffix:?}");
37 &_hold
38 } else {
39 "any extension"
40 };
41 anyhow!("missing file extension, expecting {extension_msg} or \"zstd\": {path:?}")
42 })?;
43
44 match ext.to_string_lossy().as_ref() {
45 "zstd" => {
46 if let Some(expected_suffix) = expected_suffix {
47 let stem = path.with_extension("");
48 let ext2 = stem.extension().ok_or_else(|| {
49 anyhow!(
50 "missing second file extension, after \"zstd\", \
51 expecting {expected_suffix:?}: {path:?}"
52 )
53 })?;
54 match ext2.to_string_lossy().as_ref() {
55 s if &*s == expected_suffix => Ok(Extension::ZStd),
56 _ => bail!(
57 "unknown second file extension {ext2:?} after \"zstd\", \
58 expecting {expected_suffix:?}: {path:?}"
59 ),
60 }
61 } else {
62 Ok(Extension::ZStd)
63 }
64 }
65 ext_str => {
66 if let Some(expected_suffix) = expected_suffix {
67 if ext_str == expected_suffix {
68 Ok(Extension::Other)
69 } else {
70 bail!(
71 "unknown file extension {ext:?}, expecting {expected_suffix:?} \
72 or \"zstd\": {path:?}"
73 )
74 }
75 } else {
76 Ok(Extension::Other)
77 }
78 }
79 }
80}
81
82#[test]
83fn t_file_extension() {
84 use Extension::*;
85 let ok = |a: &str, b: &'static str| {
86 file_extension(a, Some(b)).expect("test call should not give an error")
87 };
88 let err = |a: &str, b: &'static str| {
89 file_extension(a, Some(b))
90 .err()
91 .expect("test call should give an error")
92 .to_string()
93 };
94 assert_eq!(ok("foo.x", "x"), Other);
95 assert_eq!(ok("foo.x.zstd", "x"), ZStd);
96 assert_eq!(ok("foo.z.x", "x"), Other);
97 assert_eq!(ok("foo.z.x.zstd", "x"), ZStd);
98 assert_eq!(
99 err("foo.x", "y"),
100 "unknown file extension \"x\", expecting \"y\" or \"zstd\": \"foo.x\""
101 );
102 assert_eq!(
103 err("foo.x.zstd", "y"),
104 "unknown second file extension \"x\" after \"zstd\", expecting \"y\": \"foo.x.zstd\""
105 );
106 assert_eq!(
107 err("foo.zstd", "y"),
108 "missing second file extension, after \"zstd\", expecting \"y\": \"foo.zstd\""
109 );
110 assert_eq!(
111 err("foo", "y"),
112 "missing file extension, expecting \"y\" or \"zstd\": \"foo\""
113 );
114}
115
116pub trait SendRead: Read + Send {}
117
118impl SendRead for StreamingDecoder<std::fs::File, FrameDecoder> {}
119impl SendRead for ChildStdout {}
120impl SendRead for File {}
121
122pub fn decompressed_file(path: &Path, expected_suffix: Option<&str>) -> Result<Box<dyn SendRead>> {
126 let ext = file_extension(path, expected_suffix)?;
127
128 let file_open = || File::open(path).with_context(|| anyhow!("opening file {path:?}"));
129
130 match ext {
131 Extension::ZStd => {
132 if USING_EXTERNAL_TOOL {
133 let mut c = Command::new("zstd");
134 let args: Vec<OsString> = vec!["-dcf".into(), "--".into(), path.into()];
135 c.args(args);
136 c.stdout(Stdio::piped());
137 let child = c.spawn().map_err(ctx!("spawning command {c:?}"))?;
138 Ok(Box::new(child.stdout.expect("present since configured")))
139 } else {
140 let input = file_open()?;
141 Ok(Box::new(
142 StreamingDecoder::new(input).map_err(ctx!("zstd-decoding {path:?}"))?,
143 ))
144 }
145 }
146 Extension::Other => Ok(Box::new(file_open()?)),
147 }
148}
149
150pub fn decompressed_file_mmap(
159 path: &Path,
160 uncompressed_path: Option<&Path>,
161 expected_suffix: Option<&str>,
162) -> Result<Mmap> {
163 let ext = file_extension(path, expected_suffix)?;
164
165 let file_open =
166 |path: &Path| File::open(path).with_context(|| anyhow!("opening file {path:?}"));
167
168 let tmp;
169 let uncompressed_path = match ext {
170 Extension::ZStd => {
171 let uncompressed_path = if let Some(uncompressed_path) = uncompressed_path {
172 uncompressed_path.to_owned()
173 } else {
174 add_extension(path, "uncompressed")
175 .ok_or_else(|| anyhow!("appending extension to {path:?}"))?
176 };
177 if !uncompressed_path.exists() {
178 let tmp = TempfileOptions {
179 target_path: uncompressed_path.clone(),
180 retain_tempfile: false,
181 migrate_access: false,
182 }
183 .tempfile()?;
184
185 let mut c = Command::new("zstd");
186 let args: Vec<OsString> = vec![
187 "-df".into(),
188 "--quiet".into(),
189 "-o".into(),
190 tmp.temp_path().into(),
191 "--".into(),
192 path.into(),
193 ];
194 c.args(args);
195 let mut child = c.spawn().map_err(ctx!("spawning command {c:?}"))?;
196 let status = child.wait()?;
197 if !status.success() {
198 bail!("{c:?} failed: {status}");
199 }
200 tmp.finish()?;
201 }
202 tmp = uncompressed_path;
203 &tmp
204 }
205 Extension::Other => path,
206 };
207
208 let input = file_open(&uncompressed_path)?;
209
210 let meta = input.metadata()?;
211 let size: usize = meta.size().try_into()?;
212 unsafe {
213 MmapOptions::new().huge(Some(21)).len(size).map(&input)
215 }
216 .map_err(ctx!("mmap for file {uncompressed_path:?}"))
217}
218
219pub fn compress_file(source_path: &Path, target_path: &Path, quiet: bool) -> Result<()> {
223 let mut c = Command::new("zstd");
224 if quiet {
225 c.arg("--quiet");
226 c.stdout(Stdio::piped());
227 c.stderr(Stdio::piped());
228 }
229 let args: &[&OsStr] = &[
230 "-o".as_ref(),
231 target_path.as_ref(),
233 "--".as_ref(),
234 source_path.as_ref(),
235 ];
236 c.args(args);
237 let output = c.output().map_err(ctx!("running command {c:?}"))?;
238 let status = output.status;
239 if status.success() {
240 Ok(())
241 } else {
242 let outputs = if quiet {
243 let mut outputs = String::from_utf8_lossy(&output.stdout).into_owned();
244 outputs.push_str(&String::from_utf8_lossy(&output.stderr));
245 format!("{outputs:?}")
246 } else {
247 "not captured".into()
248 };
249 bail!("running zstd {args:?}: {status} with outputs {outputs}")
250 }
251}