evobench_tools/run/
versioned_dataset_dir.rs1use std::{
2 collections::HashMap,
3 ops::Deref,
4 path::{Path, PathBuf},
5 str::FromStr,
6 sync::{Arc, MutexGuard},
7};
8
9use anyhow::{Result, anyhow, bail};
10use cj_path_util::path_util::AppendToPath;
11use run_git::git::GitWorkingDir;
12
13use crate::{
14 ctx, debug,
15 git::{GitGraph, GitGraphData, GitHash},
16 serde_types::proper_filename::ProperFilename,
17 warn,
18};
19
20#[derive(Debug)]
22pub struct VersionedDatasetReferencesIndex {
23 commit_to_dirname: HashMap<GitHash, ProperFilename>,
24}
25
26impl VersionedDatasetReferencesIndex {
27 pub fn read(versioned_datasets_dir: &Path, git_working_dir: &GitWorkingDir) -> Result<Self> {
28 let mut commit_to_dirname = HashMap::new();
29 for entry in std::fs::read_dir(&versioned_datasets_dir)
30 .map_err(ctx!("can't open directory {versioned_datasets_dir:?}"))?
31 {
32 let entry = entry?;
33 if entry.path().is_file() {
36 continue;
37 }
38 if !entry.path().is_dir() {
39 bail!(
40 "non-dir non-file entry (broken symlink?) at {:?}",
41 entry.path()
42 )
43 }
44 if let Some(file_name) = entry.file_name().to_str() {
45 if let Some(commit) = git_working_dir.git_rev_parse(&file_name, true)? {
46 let commit = GitHash::from_str(&commit)?;
49 let folder_name: ProperFilename = file_name.parse().map_err(|e| {
50 anyhow!(
51 "versioned dataset dir {versioned_datasets_dir:?} \
52 entry {file_name:?}: {e:#}"
53 )
54 })?;
55 commit_to_dirname.insert(commit, folder_name);
56 } else {
57 warn!(
58 "file name of this path can't be found as Git revision: {:?}",
59 entry.path()
60 )
61 }
62 } else {
63 warn!(
64 "file name of this path can't be decoded as utf-8: {:?}",
65 entry.path()
66 )
67 }
68 }
69 Ok(Self { commit_to_dirname })
70 }
71}
72
73impl Deref for VersionedDatasetReferencesIndex {
74 type Target = HashMap<GitHash, ProperFilename>;
75
76 fn deref(&self) -> &Self::Target {
77 &self.commit_to_dirname
78 }
79}
80
81pub struct VersionedDatasetDir {
82 git_graph: Arc<GitGraph>,
83}
84
85impl VersionedDatasetDir {
86 pub fn new() -> Self {
87 Self {
88 git_graph: GitGraph::new(),
89 }
90 }
91
92 pub fn updated_git_graph<'s>(
93 &'s self,
94 git_working_dir: &'s GitWorkingDir,
95 commit_id: &'s GitHash,
96 ) -> Result<VersionedDatasetDirLock<'s>> {
97 let mut git_graph_data = self.git_graph.lock();
101 git_graph_data.add_history_from_dir_ref(
102 &*git_working_dir.working_dir_path,
104 &commit_id.to_string(),
106 )?;
107 Ok(VersionedDatasetDirLock {
108 git_working_dir,
109 commit_id,
110 git_graph_data,
111 })
112 }
113}
114
115pub struct VersionedDatasetDirLock<'s> {
116 git_working_dir: &'s GitWorkingDir,
117 commit_id: &'s GitHash,
118 git_graph_data: MutexGuard<'s, GitGraphData>,
119}
120
121impl<'s> VersionedDatasetDirLock<'s> {
122 pub fn dataset_dir_for_commit(
126 &self,
127 versioned_datasets_base_dir: &Path,
128 dataset_name: &str,
129 ) -> Result<PathBuf> {
130 let versioned_datasets_dir = versioned_datasets_base_dir.append(dataset_name);
131
132 let commit_to_dirname =
133 VersionedDatasetReferencesIndex::read(&versioned_datasets_dir, self.git_working_dir)?;
134 let commit_id = self.commit_id;
135 debug!(
136 "of the revisions {commit_to_dirname:?}, \
137 find the latest ancestor for commit {commit_id:?}"
138 );
139 let commit_id_id = self
140 .git_graph_data
141 .get_by_hash(commit_id)
142 .expect("always contained, as per documented usage contract");
143 let ancestor_or_self_id = self
144 .git_graph_data
145 .closest_matching_ancestor_of(commit_id_id, |id| {
146 let commit = self.git_graph_data.get(id).expect("internal consistency");
147 commit_to_dirname.contains_key(&commit.commit.commit_hash)
148 })?
149 .ok_or_else(|| {
150 anyhow!(
151 "can't find a dataset for commit {commit_id} in dir {versioned_datasets_dir:?} \
152 -- datasets should be in sub-directories of this dir, named after Git \
153 references (like tags or commits)"
154 )
155 })?;
156 let ancestor_or_self = &self
157 .git_graph_data
158 .get(ancestor_or_self_id)
159 .expect("internal consistency")
160 .commit
161 .commit_hash;
162 let chosen_dirname = commit_to_dirname
163 .get(ancestor_or_self)
164 .expect("outer internal consistency");
165 Ok(versioned_datasets_dir.append(chosen_dirname.as_str()))
166 }
167}