chj_rustbin/io/
file_path_type.rs

1use std::fs;
2use std::path::Path;
3use std::{ffi::OsString, fmt::Debug, path::PathBuf};
4
5use anyhow::{anyhow, Context, Result};
6use genawaiter::sync::Gen;
7use log::trace;
8
9use crate::io::excludes::Excludes;
10use crate::region::{Region, RegionId};
11use crate::scope;
12
13/// Get filesystem items while making use of the excludes module.
14
15#[derive(Debug, Clone, Copy)]
16pub struct ItemOptions {
17    pub dirs: bool,
18    pub files: bool,
19    pub other: bool,
20}
21
22/// Implement conversion from the type `$from` to
23/// `ItemOptions`. Assumes the `dirs`, `files`, and `other` fields are
24/// present on `$from` as `bool`.
25#[macro_export]
26macro_rules! impl_item_options_from {
27    { $from:ty } => {
28        impl From<&$from> for $crate::io::file_path_type::ItemOptions {
29            fn from(o: &$from) -> Self {
30                $crate::io::file_path_type::ItemOptions {
31                    dirs: o.dirs,
32                    files: o.files,
33                    other: o.other
34                }
35            }
36        }
37    }
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum FileType {
42    /// `File` does not include symlinks
43    File,
44    Dir,
45    Symlink,
46    Other,
47}
48
49impl FileType {
50    pub fn is_dir(self) -> bool {
51        match self {
52            FileType::Dir => true,
53            _ => false,
54        }
55    }
56    /// Does not include symlinks
57    pub fn is_file(self) -> bool {
58        match self {
59            FileType::File => true,
60            _ => false,
61        }
62    }
63}
64
65impl From<&fs::FileType> for FileType {
66    fn from(ft: &fs::FileType) -> Self {
67        if ft.is_dir() {
68            Self::Dir
69        } else if ft.is_file() {
70            Self::File
71        } else if ft.is_symlink() {
72            Self::Symlink
73        } else {
74            Self::Other
75        }
76    }
77}
78
79/// This trait is used to create parent nodes while walking
80/// directories/trees. You need to implement it for your type (it is
81/// already implemented for `PathBuf`).
82pub trait FileParent<'region>: Sized {
83    fn new(
84        region: &Region<'region, Self>,
85        item: &FilePathType<'region, Self>,
86    ) -> Self;
87    fn path(&self) -> &Path;
88}
89
90impl<'region> FileParent<'region> for PathBuf {
91    fn new(
92        region: &Region<'region, Self>,
93        item: &FilePathType<'region, Self>,
94    ) -> Self {
95        region.get(item.file_parent).clone().join(&item.file_name)
96    }
97
98    fn path(&self) -> &Path {
99        self
100    }
101}
102
103#[derive(Debug, PartialEq)]
104pub struct FilePathType<'region, P: FileParent<'region> = PathBuf> {
105    pub file_parent: RegionId<'region, P>,
106    pub file_name: OsString,
107    pub file_type: FileType,
108}
109
110// Why do we need to impleemnt Clone manually, when RegionId already
111// does it, and P is the type about which the compiler complains; is
112// derive(Clone) intentionally restricting it?
113impl<'region, P: FileParent<'region>> Clone for FilePathType<'region, P> {
114    fn clone(&self) -> Self {
115        Self {
116            file_parent: self.file_parent,
117            file_name: self.file_name.clone(),
118            file_type: self.file_type,
119        }
120    }
121}
122
123impl<'region, P: FileParent<'region>> FilePathType<'region, P> {
124    pub fn is_file(&self) -> bool {
125        self.file_type.is_file()
126    }
127    pub fn is_dir(&self) -> bool {
128        self.file_type.is_dir()
129    }
130    pub fn to_path_buf(&self, region: &Region<'region, P>) -> PathBuf {
131        // XX cache value?
132        region.get(self.file_parent).path().join(&self.file_name)
133    }
134}
135
136/// Does not descend into dirs. You'll want to use `PathBuf` for `P`
137/// unless you have a need to store additional data in the parent
138/// nodes.
139pub fn file_path_types_iter<'region, 't, P: FileParent<'t> + Send + Sync>(
140    region: &'t Region<'region, P>,
141    file_parent: RegionId<'region, P>,
142    opt: ItemOptions,
143    excludes: &'t Excludes,
144) -> Result<impl Iterator<Item = Result<FilePathType<'t, P>>> + Send + 't> {
145    // eprintln!("items({dir_path:?}, {opt:?})");
146    let iterator = scope!{ fs::read_dir(region.get(file_parent).path()) }.with_context(
147        || anyhow!("opening directory {:?} for reading", region.get(file_parent).path()))?
148    .filter_map(
149        move |entry_result: Result<fs::DirEntry, std::io::Error>|
150                                   -> Option<Result<FilePathType<P>>> {
151            match entry_result {
152                Ok(entry) => {
153                    let ft = entry.file_type()
154                        .expect("does this fail on OSes needing stat?");
155                    let file_name = entry.file_name();
156                    let handle_as_dir = ft.is_dir()
157                        && opt.dirs
158                        && ! excludes.filename_is_excluded(&file_name, true);
159                    let handle_as_file = ft.is_file()
160                        && opt.files
161                        && ! excludes.filename_is_excluded(&file_name, false);
162                    let handle_as_other = opt.other &&
163                        (!ft.is_dir() && !ft.is_file());
164                    if handle_as_dir || handle_as_file || handle_as_other {
165                        let file_type = FileType::from(&ft);
166                        Some(Ok(FilePathType { file_parent, file_name, file_type }))
167                    } else {
168                        trace!(
169                            "ignoring item '{:?}' (type {:?})",
170                            file_name, ft);
171                        None
172                    }
173                },
174                Err(e) =>
175                    Some(Err(e).with_context(|| anyhow!("read_dir on {:?}",
176                                                        region.get(file_parent).path())))
177            }
178        });
179    Ok(iterator)
180}
181
182/// Does not descend into dirs.
183pub fn file_path_types_vec<'region, 't, P: FileParent<'t> + Send + Sync>(
184    region: &'t Region<'region, P>,
185    file_parent: RegionId<'region, P>,
186    opt: ItemOptions,
187    excludes: &'t Excludes,
188    sorted: bool,
189) -> Result<Vec<FilePathType<'t, P>>> {
190    let mut vec: Vec<FilePathType<'t, P>> =
191        file_path_types_iter(region, file_parent, opt, excludes)?
192            .collect::<Result<_, _>>()?;
193    if sorted {
194        vec.sort_by(|a, b| a.file_name.cmp(&b.file_name));
195    }
196    Ok(vec)
197}
198
199/// Same API as `file_path_types_iter` but can yield sorted output
200/// (and in that case reports all file system errors directly from the
201/// call, items then always being Ok).
202pub fn file_path_types_sortable_iter<
203    'region,
204    't,
205    P: FileParent<'t> + Send + Sync,
206>(
207    region: &'t Region<'region, P>,
208    file_parent: RegionId<'region, P>,
209    opt: ItemOptions,
210    excludes: &'t Excludes,
211    sorted: bool,
212) -> Result<Box<dyn Iterator<Item = Result<FilePathType<'t, P>>> + Send + 't>> {
213    if sorted {
214        let vec =
215            file_path_types_vec(region, file_parent, opt, excludes, true)?;
216        Ok(Box::new(vec.into_iter().map(|v| Ok(v))))
217    } else {
218        Ok(Box::new(file_path_types_iter(
219            region,
220            file_parent,
221            opt,
222            excludes,
223        )?))
224    }
225}
226
227/// Descends into subdirs. You'll want to use `PathBuf` for `P` unless
228/// you have a need to store additional data in the parent nodes. When
229/// `sorted == true`, sorts every directory level individually,
230/// yielding sorted output at only the memory cost of the largest
231/// directory.
232pub fn recursive_file_path_types_iter<
233    'region,
234    't,
235    P: FileParent<'t> + Send + Sync,
236>(
237    region: &'t Region<'region, P>,
238    file_parent: RegionId<'region, P>,
239    opt: ItemOptions,
240    excludes: &'t Excludes,
241    sorted: bool,
242) -> impl Iterator<Item = Result<FilePathType<'t, P>>> + Send + 't {
243    Gen::new(|co| async move {
244        let orig_opt = opt;
245        let opt_with_dir = ItemOptions {
246            dirs: true,
247            ..orig_opt
248        };
249        let mut iter = match file_path_types_sortable_iter(
250            region,
251            file_parent,
252            opt_with_dir,
253            excludes,
254            sorted,
255        ) {
256            Ok(it) => it,
257            Err(e) => {
258                co.yield_(Err(e)).await;
259                return;
260            }
261        };
262        let mut stack = vec![];
263        loop {
264            while let Some(item) = iter.next() {
265                match item {
266                    Ok(item) => {
267                        if item.is_dir() {
268                            let file_parent =
269                                region.store(P::new(region, &item));
270                            if orig_opt.dirs {
271                                co.yield_(Ok(item.clone())).await;
272                            }
273                            stack.push(iter);
274                            match file_path_types_sortable_iter(
275                                region,
276                                file_parent,
277                                opt_with_dir,
278                                excludes,
279                                sorted,
280                            ) {
281                                Ok(new_iter) => iter = new_iter,
282                                Err(e) => {
283                                    co.yield_(Err(e)).await;
284                                    return;
285                                }
286                            }
287                        } else {
288                            co.yield_(Ok(item)).await;
289                        }
290                    }
291                    Err(e) => {
292                        co.yield_(Err(e)).await;
293                        return;
294                    }
295                }
296            }
297            if let Some(old_iter) = stack.pop() {
298                iter = old_iter;
299            } else {
300                return;
301            }
302        }
303    })
304    .into_iter()
305}
306
307#[cfg(test)]
308mod tests {
309    use crate::io::excludes::empty_excludes;
310
311    use super::*;
312
313    #[test]
314    fn t_recursive_file_path_types_iter() {
315        let region: Region<PathBuf> = Region::new();
316        let excludes = empty_excludes(true);
317        let t = |opt| -> Result<Vec<PathBuf>> {
318            let iter = recursive_file_path_types_iter(
319                &region,
320                region.store("test/file_path_type/".into()),
321                opt,
322                &excludes,
323                // XX try true, too?
324                false,
325            );
326            let mut v = iter
327                .map(|r| r.map(|s| s.to_path_buf(&region)))
328                .collect::<Result<Vec<_>, _>>()?;
329            v.sort_by(|a, b| a.cmp(&b));
330            Ok(v)
331        };
332
333        assert_eq!(
334            t(ItemOptions {
335                dirs: true,
336                files: false,
337                other: true,
338            })
339            .unwrap(),
340            &["test/file_path_type/bar", "test/file_path_type/foo"]
341                .map(PathBuf::from)
342        );
343
344        assert_eq!(
345            t(ItemOptions {
346                dirs: true,
347                files: true,
348                other: true,
349            })
350            .unwrap(),
351            &[
352                "test/file_path_type/bar",
353                "test/file_path_type/bar/c",
354                "test/file_path_type/foo",
355                "test/file_path_type/foo/a",
356                "test/file_path_type/foo/b"
357            ]
358            .map(PathBuf::from)
359        );
360
361        assert_eq!(
362            t(ItemOptions {
363                dirs: false,
364                files: true,
365                other: true,
366            })
367            .unwrap(),
368            &[
369                "test/file_path_type/bar/c",
370                "test/file_path_type/foo/a",
371                "test/file_path_type/foo/b"
372            ]
373            .map(PathBuf::from)
374        );
375    }
376}