1use std::{
2 collections::{hash_map::Entry, HashMap},
3 ffi::OsString,
4 os::unix::fs::MetadataExt,
5 path::PathBuf,
6 sync::Mutex,
7};
8
9use anyhow::{anyhow, Context, Result};
10
11use crate::io::file_path_type::FileType;
12
13pub fn bytes_to_gib_string(bytes: u64) -> String {
15 let mb_times_1000 = (bytes / 1024 + 512) * 1000 / 1024;
16 let gb_times_1000 = mb_times_1000 / 1024;
17 let digits = gb_times_1000.to_string();
18 let len = digits.len();
19 if len <= 3 {
20 format!(" 0.{gb_times_1000:03}")
21 } else {
22 let hi = &digits[0..len - 3];
23 let lo = &digits[len - 3..];
24 format!("{hi:>3}.{lo}")
25 }
26}
27
28#[test]
29fn t_bytes_to_gib_string() {
30 let t = bytes_to_gib_string;
31 assert_eq!(t(0), " 0.000");
32 assert_eq!(t(20000), " 0.000");
33 assert_eq!(t(500000), " 0.000");
34 assert_eq!(t(600000), " 0.001");
35 assert_eq!(t(1024 * 1024 * 1024), " 1.000");
36 assert_eq!(t(512 * 1024 * 1024), " 0.500");
37 assert_eq!(t(900 * 1024 * 1024 * 1024), "900.000");
38}
39
40pub fn bytes_to_kb(bytes: u64) -> u64 {
42 (bytes + 1023) / 1024
43}
44
45pub fn to_human_readable(
46 powers: u64,
47 si: bool,
48 mut val: u64,
50) -> (u64, &'static str) {
51 let mut n = 0;
52 const MULTIPLIER: u64 = 10;
55 loop {
56 let val2 = (val + powers / 2) / powers;
57 if val2 > MULTIPLIER {
58 val = val2;
59 n += 1;
60 } else {
61 break;
62 }
63 }
64 let unit = match n {
65 0 => "",
66 1 => {
67 if si {
68 "k"
69 } else {
70 "K"
71 }
72 }
73 2 => "M",
74 3 => "G",
75 4 => "T",
76 5 => "P",
77 6 => "E",
78 7 => "Z",
79 8 => "Y",
80 9 => "R",
81 10 => "Q",
82 _ => unreachable!("number too large, don't have a prefix"),
84 };
85 (val, unit)
86}
87
88const BLOCKSIZE: u64 = 512;
90
91pub struct ItemError {
92 pub file_type: FileType,
93 pub file_name: OsString,
94 pub error: String,
95}
96
97pub struct DirDiskUsage {
99 pub path: PathBuf,
101 pub file_bytes: u64,
104 pub shared_files: Vec<InodeKey>,
108 pub subdirs: Vec<Result<DirDiskUsage>>,
110 pub errors: Vec<ItemError>,
114}
115
116impl DirDiskUsage {
117 pub fn total_files(
119 &self,
120 shared_inodes: &HashMap<InodeKey, InodeData>,
121 ) -> u64 {
122 self.file_bytes
123 + self
124 .shared_files
125 .iter()
126 .map(|inode_key| {
127 shared_inodes.get(inode_key).expect(
128 "given correct shared_inodes table, entries are always present"
129 ).bytes_share_rounded()
130 })
131 .sum::<u64>()
132 }
133
134 pub fn total_subdirs(
136 &self,
137 shared_inodes: &HashMap<InodeKey, InodeData>,
138 ) -> u64 {
139 self.subdirs
140 .iter()
141 .map(|result| -> u64 {
142 match result {
143 Ok(du) => du.total(shared_inodes),
144 Err(_) => 0,
145 }
146 })
147 .sum()
148 }
149
150 pub fn total(&self, shared_inodes: &HashMap<InodeKey, InodeData>) -> u64 {
152 self.total_files(shared_inodes) + self.total_subdirs(shared_inodes)
153 }
154
155 pub fn get_errors(&self, limit: usize, out: &mut Vec<String>) {
157 for ItemError {
158 file_type,
159 file_name,
160 error,
161 } in &self.errors
162 {
163 if out.len() >= limit {
164 return;
165 }
166 out.push(format!(
167 "{file_type:?} item {file_name:?} in {:?}: {error:#}",
168 self.path
169 ));
170 }
171 for subdir in &self.subdirs {
172 if out.len() >= limit {
173 return;
174 }
175 match subdir {
176 Ok(du) => du.get_errors(limit, out),
177 Err(error) => {
178 out.push(format!("{error:#}",));
179 }
180 }
181 }
182 }
183}
184
185#[derive(Clone, PartialEq, Eq, Hash)]
186pub struct InodeKey {
187 pub dev: u64,
188 pub inode: u64,
189}
190
191pub struct InodeData {
192 pub bytes: u64,
195 pub share_count: u64,
200}
201
202impl InodeData {
203 pub fn bytes_share_rounded(&self) -> u64 {
205 (self.bytes + (self.share_count + 1) / 2) / self.share_count
206 }
207}
208
209#[test]
210fn t_bytes_share_rounded() {
211 let t = |bytes, share_count| {
212 InodeData { share_count, bytes }.bytes_share_rounded()
213 };
214 assert_eq!(t(5, 3), 2);
215 assert_eq!(t(5, 4), 1);
216 assert_eq!(t(6, 3), 2);
217 assert_eq!(t(6, 2), 3);
218 assert_eq!(t(6, 4), 2);
219 assert_eq!(t(6, 5), 1);
220}
221
222pub struct GetDirDiskUsage {
223 pub one_file_system: bool,
224 pub share_globally: bool,
225 pub shared_inodes: Mutex<HashMap<InodeKey, InodeData>>,
226}
227
228impl GetDirDiskUsage {
229 pub fn dir_disk_usage(
230 &self,
231 path: PathBuf,
232 current_dev: u64,
233 ) -> Result<DirDiskUsage> {
234 let items = std::fs::read_dir(&path)
235 .with_context(|| anyhow!("opening directory {path:?}"))?;
236 let mut file_bytes = 0;
237 let mut errors = vec![];
238 let mut shared_files = vec![];
239 let subdirs = Mutex::new(vec![]);
240 rayon::scope(|scope| -> Result<()> {
241 for item in items {
242 let item =
243 item.with_context(|| anyhow!("reading items in {path:?}"))?;
244 let file_name = item.file_name();
245 match item.metadata() {
246 Ok(metadata) => {
247 let blocks = metadata.blocks();
249 let blocksize = BLOCKSIZE; let mut inc_file_bytes = || {
252 file_bytes += blocks * blocksize;
253 };
254
255 if metadata.is_dir() {
256 let new_dev = metadata.dev();
257
258 if (!self.one_file_system) || new_dev == current_dev
259 {
260 inc_file_bytes();
266
267 let mut path = path.clone();
269 path.push(&file_name);
270 let subdirs = &subdirs;
271 scope.spawn(move |_| {
272 let result =
273 self.dir_disk_usage(path, new_dev);
274 subdirs
275 .lock()
276 .expect("no crash")
277 .push(result);
278 });
279 }
280 } else {
281 let nlink = metadata.nlink();
282 if nlink > 1 && blocks > 0 {
283 if self.share_globally {
284 file_bytes += (blocks * blocksize
285 + (nlink + 1) / 2)
286 / nlink;
287 } else {
288 let key = InodeKey {
289 dev: metadata.dev(),
290 inode: metadata.ino(),
291 };
292
293 shared_files.push(key.clone());
294
295 let mut shared = self
296 .shared_inodes
297 .lock()
298 .expect("no crash");
299 match shared.entry(key) {
300 Entry::Occupied(mut o) => {
301 let data = o.get_mut();
302 data.share_count += 1;
303 }
304 Entry::Vacant(v) => {
305 v.insert(InodeData {
306 share_count: 1,
307 bytes: blocks * blocksize,
308 });
309 }
310 }
311 }
312 } else {
313 inc_file_bytes()
314 }
315 }
316 }
317 Err(e) => {
318 let file_type: FileType = (&item.file_type()?).into();
320 errors.push(ItemError {
321 file_type,
322 file_name,
323 error: format!("{e:#}"),
324 });
325 }
326 }
327 }
328 Ok(())
329 })?;
330
331 let subdirs = subdirs.into_inner().expect("no crash either");
332
333 Ok(DirDiskUsage {
334 path,
335 file_bytes,
336 shared_files,
337 subdirs,
338 errors,
339 })
340 }
341}