evobench_tools/run/
versioned_dataset_dir.rs

1use std::{
2    collections::HashMap,
3    ops::Deref,
4    path::{Path, PathBuf},
5    str::FromStr,
6    sync::{Arc, MutexGuard},
7};
8
9use anyhow::{Result, anyhow, bail};
10use cj_path_util::path_util::AppendToPath;
11use run_git::git::GitWorkingDir;
12
13use crate::{
14    ctx, debug,
15    git::{GitGraph, GitGraphData, GitHash},
16    serde_types::proper_filename::ProperFilename,
17    warn,
18};
19
20/// Index of all versioned entries
21#[derive(Debug)]
22pub struct VersionedDatasetReferencesIndex {
23    commit_to_dirname: HashMap<GitHash, ProperFilename>,
24}
25
26impl VersionedDatasetReferencesIndex {
27    pub fn read(versioned_datasets_dir: &Path, git_working_dir: &GitWorkingDir) -> Result<Self> {
28        let mut commit_to_dirname = HashMap::new();
29        for entry in std::fs::read_dir(&versioned_datasets_dir)
30            .map_err(ctx!("can't open directory {versioned_datasets_dir:?}"))?
31        {
32            let entry = entry?;
33            // Ignore non-dir entries? Could allow for README
34            // files or so.
35            if entry.path().is_file() {
36                continue;
37            }
38            if !entry.path().is_dir() {
39                bail!(
40                    "non-dir non-file entry (broken symlink?) at {:?}",
41                    entry.path()
42                )
43            }
44            if let Some(file_name) = entry.file_name().to_str() {
45                if let Some(commit) = git_working_dir.git_rev_parse(&file_name, true)? {
46                    // XX can git_rev_parse not return GitHash? Wrap
47                    // it when called with true?
48                    let commit = GitHash::from_str(&commit)?;
49                    let folder_name: ProperFilename = file_name.parse().map_err(|e| {
50                        anyhow!(
51                            "versioned dataset dir {versioned_datasets_dir:?} \
52                             entry {file_name:?}: {e:#}"
53                        )
54                    })?;
55                    commit_to_dirname.insert(commit, folder_name);
56                } else {
57                    warn!(
58                        "file name of this path can't be found as Git revision: {:?}",
59                        entry.path()
60                    )
61                }
62            } else {
63                warn!(
64                    "file name of this path can't be decoded as utf-8: {:?}",
65                    entry.path()
66                )
67            }
68        }
69        Ok(Self { commit_to_dirname })
70    }
71}
72
73impl Deref for VersionedDatasetReferencesIndex {
74    type Target = HashMap<GitHash, ProperFilename>;
75
76    fn deref(&self) -> &Self::Target {
77        &self.commit_to_dirname
78    }
79}
80
81pub struct VersionedDatasetDir {
82    git_graph: Arc<GitGraph>,
83}
84
85impl VersionedDatasetDir {
86    pub fn new() -> Self {
87        Self {
88            git_graph: GitGraph::new(),
89        }
90    }
91
92    pub fn updated_git_graph<'s>(
93        &'s self,
94        git_working_dir: &'s GitWorkingDir,
95        commit_id: &'s GitHash,
96    ) -> Result<VersionedDatasetDirLock<'s>> {
97        // Update graphdata with the (no need to update
98        // tag mappings, they are read directly from Git
99        // in `dataset_dir_for_commit`)
100        let mut git_graph_data = self.git_graph.lock();
101        git_graph_data.add_history_from_dir_ref(
102            // XX should pass &git_working_dir instead
103            &*git_working_dir.working_dir_path,
104            // XX should pass &GitHash instead
105            &commit_id.to_string(),
106        )?;
107        Ok(VersionedDatasetDirLock {
108            git_working_dir,
109            commit_id,
110            git_graph_data,
111        })
112    }
113}
114
115pub struct VersionedDatasetDirLock<'s> {
116    git_working_dir: &'s GitWorkingDir,
117    commit_id: &'s GitHash,
118    git_graph_data: MutexGuard<'s, GitGraphData>,
119}
120
121impl<'s> VersionedDatasetDirLock<'s> {
122    /// Must be up to date and include all the possibly used
123    /// references and the commit! Those conditions are ensured by
124    /// `working_directory.checkout()`.
125    pub fn dataset_dir_for_commit(
126        &self,
127        versioned_datasets_base_dir: &Path,
128        dataset_name: &str,
129    ) -> Result<PathBuf> {
130        let versioned_datasets_dir = versioned_datasets_base_dir.append(dataset_name);
131
132        let commit_to_dirname =
133            VersionedDatasetReferencesIndex::read(&versioned_datasets_dir, self.git_working_dir)?;
134        let commit_id = self.commit_id;
135        debug!(
136            "of the revisions {commit_to_dirname:?}, \
137             find the latest ancestor for commit {commit_id:?}"
138        );
139        let commit_id_id = self
140            .git_graph_data
141            .get_by_hash(commit_id)
142            .expect("always contained, as per documented usage contract");
143        let ancestor_or_self_id = self
144            .git_graph_data
145            .closest_matching_ancestor_of(commit_id_id, |id| {
146                let commit = self.git_graph_data.get(id).expect("internal consistency");
147                commit_to_dirname.contains_key(&commit.commit.commit_hash)
148            })?
149            .ok_or_else(|| {
150                anyhow!(
151                    "can't find a dataset for commit {commit_id} in dir {versioned_datasets_dir:?} \
152                     -- datasets should be in sub-directories of this dir, named after Git \
153                     references (like tags or commits)"
154                )
155            })?;
156        let ancestor_or_self = &self
157            .git_graph_data
158            .get(ancestor_or_self_id)
159            .expect("internal consistency")
160            .commit
161            .commit_hash;
162        let chosen_dirname = commit_to_dirname
163            .get(ancestor_or_self)
164            .expect("outer internal consistency");
165        Ok(versioned_datasets_dir.append(chosen_dirname.as_str()))
166    }
167}