evobench_tools/run/
working_directory.rs

1//! An abstraction for an *existing* directory, and one that should be
2//! usable (i.e. is worth trying to use).
3
4use std::{
5    fmt::Display,
6    fs::Permissions,
7    ops::{Deref, DerefMut},
8    os::unix::fs::PermissionsExt,
9    path::{Path, PathBuf},
10    sync::Arc,
11    time::{Duration, SystemTime},
12};
13
14use anyhow::{Result, anyhow, bail};
15use chj_unix_util::polling_signals::PollingSignalsSender;
16use run_git::{
17    git::{GitResetMode, GitWorkingDir, git_clone},
18    path_util::add_extension,
19};
20use serde::{Deserialize, Serialize};
21
22use crate::{
23    config_file::{load_ron_file, ron_to_file_pretty},
24    ctx, debug,
25    git::GitHash,
26    git_ext::MoreGitWorkingDir,
27    info,
28    run::working_directory_pool::{
29        WorkingDirectoryId, WorkingDirectoryPoolGuard, WorkingDirectoryPoolGuardMut,
30    },
31    serde_types::{date_and_time::DateTimeWithOffset, git_url::GitUrl},
32    utillib::arc::CloneArc,
33    warn,
34};
35
36/// The name of the default upstream; just Git's default name when
37/// cloning, relying on that!
38pub const REMOTE_NAME: &str = "origin";
39
40#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
41#[serde(deny_unknown_fields)]
42#[serde(rename = "WorkingDirectoryAutoClean")]
43pub struct WorkingDirectoryAutoCleanOpts {
44    /// The minimum age a working directory should reach before
45    /// possibly being deleted, in days (recommended: 3)
46    pub min_age_days: u16,
47
48    /// The minimum number of jobs that should be run in a working
49    /// directory before that is possibly being deleted (recommended:
50    /// 80).
51    pub min_num_runs: usize,
52
53    /// If true, directories are not deleted when any job for the same
54    /// commit id is in the queue.  (Directories are deleted when they
55    /// reach both the `min_age_days` and `min_num_runs` numbers, and
56    /// this is false, or the current job just ended and no others for
57    /// the commit id exist.)
58    pub wait_until_commit_done: bool,
59}
60
61const NO_OPTIONS: &[&str] = &[];
62
63#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
64pub enum Status {
65    /// Only checked out for that commit
66    CheckedOut,
67    /// Currently running in process_working_directory
68    Processing,
69    /// Project benchmarking gave error for that commit (means working dir is set aside)
70    Error,
71    /// Project benchmarking ran through for that commit
72    Finished,
73    /// Marked for examination, probably after an Error happened and
74    /// it was decided to keep the directory around
75    Examination,
76}
77
78impl Status {
79    /// How well the dir is usable for a given commit id
80    fn score(self) -> u32 {
81        match self {
82            Status::CheckedOut => 1,
83            Status::Processing => 2,
84            Status::Error => 3,
85            Status::Finished => 4,
86            Status::Examination => 5,
87        }
88    }
89
90    /// Whether the daemon is allowed to use the dir
91    pub fn can_be_used_for_jobs(self) -> bool {
92        match self {
93            Status::CheckedOut | Status::Processing | Status::Finished => true,
94            Status::Error | Status::Examination => false,
95        }
96    }
97
98    pub const MAX_STR_LEN: usize = 11;
99
100    pub fn as_str(self) -> &'static str {
101        match self {
102            Status::CheckedOut => "checked-out",
103            Status::Processing => "processing",
104            Status::Error => "error",
105            Status::Finished => "finished",
106            Status::Examination => "examination",
107        }
108    }
109}
110
111impl PartialOrd for Status {
112    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
113        Some(self.cmp(other))
114    }
115}
116
117impl Ord for Status {
118    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
119        self.score().cmp(&other.score())
120    }
121}
122
123impl Display for Status {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        f.write_str(self.as_str())
126    }
127}
128
129/// Stored in `$n.status` files for each working directory
130#[derive(Debug, Serialize, Deserialize)]
131pub struct WorkingDirectoryStatus {
132    pub creation_timestamp: DateTimeWithOffset,
133    pub num_runs: usize,
134    pub status: Status,
135}
136
137impl WorkingDirectoryStatus {
138    fn new() -> Self {
139        Self {
140            creation_timestamp: DateTimeWithOffset::now(None),
141            num_runs: 0,
142            status: Status::CheckedOut,
143        }
144    }
145}
146
147// This is pretty much like WorkingDirectoryPool has a separate
148// WorkingDirectoryPoolBaseDir, right? (Need to store Arc<PathBuf>
149// since that's what `run-git` currently uses, should change that.)
150/// A path to a working directory. Has methods that only need a path,
151/// nothing else.
152#[derive(Clone)]
153pub struct WorkingDirectoryPath(Arc<PathBuf>);
154
155impl From<Arc<PathBuf>> for WorkingDirectoryPath {
156    fn from(value: Arc<PathBuf>) -> Self {
157        Self(value)
158    }
159}
160
161impl From<WorkingDirectoryPath> for Arc<PathBuf> {
162    fn from(value: WorkingDirectoryPath) -> Self {
163        value.0
164    }
165}
166
167impl From<WorkingDirectoryPath> for PathBuf {
168    fn from(value: WorkingDirectoryPath) -> Self {
169        match Arc::try_unwrap(value.0) {
170            Ok(v) => v,
171            Err(value) => value.as_path().to_owned(),
172        }
173    }
174}
175
176impl WorkingDirectoryPath {
177    const STANDARD_LOG_EXTENSION_BASE: &str = "output_of_benchmarking_command_at_";
178
179    /// The path to which the stdout and stderr of the target process
180    /// is logged, including a header with the serialized key data
181    /// (before it is moved/compressed to the output directory when
182    /// the run was successful)
183    pub fn standard_log_path(&self, timestamp: &DateTimeWithOffset) -> Result<PathBuf> {
184        let path = &self.0;
185        add_extension(
186            &**path,
187            format!("{}{timestamp}", Self::STANDARD_LOG_EXTENSION_BASE),
188        )
189        .ok_or_else(|| anyhow!("can't add extension to path {path:?}"))
190    }
191
192    /// Originally thought `id` is a pool matter only, but now need it
193    /// to filter for standard_log paths. Leaving id as string,
194    /// though.)
195    pub fn parent_path_and_id(&self) -> Result<(&Path, &str)> {
196        let p = &self.0;
197        let parent = p
198            .parent()
199            .ok_or_else(|| anyhow!("working directory path {p:?} doesn't have parent path"))?;
200        let file_name = p
201            .file_name()
202            .ok_or_else(|| anyhow!("working directory path {p:?} doesn't have file_name"))?;
203        let file_name = file_name.to_str().ok_or_else(|| {
204            anyhow!("working directory path {p:?} does not have a file name in unicode")
205        })?;
206        Ok((parent, file_name))
207    }
208
209    /// All stdout log files that were written for this working
210    /// directory: path including file name, just the
211    /// timestamp. Sorted by timestamp, newest last.
212    pub fn standard_log_paths(&self) -> Result<Vec<(PathBuf, String)>> {
213        let (parent_path, id_str) = self.parent_path_and_id()?;
214        let filename_prefix = format!("{id_str}.{}", Self::STANDARD_LOG_EXTENSION_BASE,);
215
216        (|| -> Result<Vec<(PathBuf, String)>> {
217            let mut paths = vec![];
218            for item in std::fs::read_dir(&parent_path)? {
219                let item = item?;
220                if let Ok(file_name) = item.file_name().into_string() {
221                    if let Some(timestamp) = file_name.strip_prefix(&filename_prefix) {
222                        paths.push((item.path(), timestamp.to_owned()));
223                    }
224                }
225            }
226            paths.sort_by(|a, b| a.1.cmp(&b.1));
227            Ok(paths)
228        })()
229        .map_err(ctx!(
230            "opening working directory parent dir {parent_path:?} for reading"
231        ))
232    }
233
234    pub fn last_standard_log_path(&self) -> Result<Option<(PathBuf, String)>> {
235        Ok(self.standard_log_paths()?.pop())
236    }
237
238    pub fn noncached_commit(&self) -> Result<GitHash> {
239        let git_working_dir = GitWorkingDir {
240            working_dir_path: self.0.clone_arc(),
241        };
242        git_working_dir.get_head_commit_id()?.parse()
243    }
244}
245
246#[derive(Debug)]
247pub struct WorkingDirectory {
248    pub git_working_dir: GitWorkingDir,
249    /// Possibly initialized lazily via `commit()` accessor
250    pub commit: Option<GitHash>,
251    pub working_directory_status: WorkingDirectoryStatus,
252    working_directory_status_needs_saving: bool,
253    /// last use time: mtime of the .status file
254    pub last_use: SystemTime,
255    // To signal changes in working directory status files. Note the
256    // warning in WorkingDirectoryPool!
257    signal_change: Option<PollingSignalsSender>,
258}
259
260pub struct WorkingDirectoryWithPoolLock<'guard> {
261    // Don't make it plain `pub` as then could be constructed without
262    // requiring going through the guard.
263    pub(crate) wd: &'guard WorkingDirectory,
264}
265
266impl<'guard> WorkingDirectoryWithPoolLock<'guard> {
267    pub fn into_inner(self) -> &'guard WorkingDirectory {
268        self.wd
269    }
270}
271
272impl<'guard> Deref for WorkingDirectoryWithPoolLock<'guard> {
273    type Target = WorkingDirectory;
274
275    fn deref(&self) -> &Self::Target {
276        self.wd
277    }
278}
279
280/// Does not own the lock! See `WorkingDirectoryWithPoolMut` for that.
281pub struct WorkingDirectoryWithPoolLockMut<'guard> {
282    // Don't make the field plain `pub` as then this could be
283    // constructed without requiring going through the guard.
284    pub(crate) wd: &'guard mut WorkingDirectory,
285}
286
287impl<'guard> Deref for WorkingDirectoryWithPoolLockMut<'guard> {
288    type Target = WorkingDirectory;
289
290    fn deref(&self) -> &Self::Target {
291        self.wd
292    }
293}
294
295impl<'guard> DerefMut for WorkingDirectoryWithPoolLockMut<'guard> {
296    fn deref_mut(&mut self) -> &mut Self::Target {
297        self.wd
298    }
299}
300
301/// Owns the lock
302pub struct WorkingDirectoryWithPoolMut<'pool> {
303    pub(crate) guard: WorkingDirectoryPoolGuardMut<'pool>,
304    pub working_directory_id: WorkingDirectoryId,
305}
306
307impl<'pool> WorkingDirectoryWithPoolMut<'pool> {
308    /// Get the working directory; does the lookup at this time, hence
309    /// Option
310    pub fn get<'s>(&'s mut self) -> Option<WorkingDirectoryWithPoolLockMut<'s>> {
311        let Self {
312            guard,
313            working_directory_id,
314        } = self;
315        Some(WorkingDirectoryWithPoolLockMut {
316            wd: guard
317                .pool
318                .get_working_directory_mut(*working_directory_id)?,
319        })
320    }
321
322    /// Releases the lock. Retrieves the working directory at this
323    /// time, hence Option.
324    pub fn into_inner(self) -> Option<&'pool mut WorkingDirectory> {
325        let Self {
326            guard,
327            working_directory_id,
328        } = self;
329        let WorkingDirectoryPoolGuardMut { _lock, pool } = guard;
330        pool.get_working_directory_mut(working_directory_id)
331    }
332}
333
334impl WorkingDirectory {
335    pub fn status_path_from_working_dir_path(path: &Path) -> Result<PathBuf> {
336        add_extension(&path, "status")
337            .ok_or_else(|| anyhow!("can't add extension to path {path:?}"))
338    }
339    fn status_path(&self) -> Result<PathBuf> {
340        Self::status_path_from_working_dir_path(self.git_working_dir.working_dir_path_ref())
341    }
342
343    /// To get access to methods that don't need a full
344    /// WorkingDirectory, just its path.
345    pub fn working_directory_path(&self) -> WorkingDirectoryPath {
346        WorkingDirectoryPath(self.git_working_dir.working_dir_path_arc())
347    }
348
349    /// Open an existing working directory. Its default upstream
350    /// (origin) is checked against `url` and changed to `url` if
351    /// different; the idea here is that while the upstream URL may
352    /// change (perhaps even semi-often), a configuration is always
353    /// about the same target project, hence they share commits, hence
354    /// deleting and re-cloning the working dir is not necessary or
355    /// desired, just changing that url so that the newest changes can
356    /// be retrieved. `omit_check` disables that check. (For
357    /// `signal_change`, see the docs on the field.)
358    pub fn open<'pool>(
359        path: PathBuf,
360        url: &GitUrl,
361        guard: &WorkingDirectoryPoolGuard<'pool>,
362        omit_check: bool,
363        signal_change: Option<PollingSignalsSender>,
364    ) -> Result<Self> {
365        // let quiet = false;
366
367        let working_directory_status_needs_saving;
368
369        let status_path = Self::status_path_from_working_dir_path(&path)?;
370        let (mtime, working_directory_status);
371        match status_path.metadata() {
372            Ok(metadata) => {
373                mtime = metadata.modified()?;
374                working_directory_status = load_ron_file(&status_path)?;
375                working_directory_status_needs_saving = false;
376            }
377            Err(e) => {
378                match e.kind() {
379                    std::io::ErrorKind::NotFound => {
380                        info!(
381                            "note: missing working directory status file {status_path:?}, \
382                             creating from defaults"
383                        );
384                        mtime = SystemTime::now();
385                        working_directory_status = WorkingDirectoryStatus::new();
386                        working_directory_status_needs_saving = true;
387                    }
388                    _ => {
389                        return Err(e).map_err(ctx!(
390                            "checking working directory status file path {status_path:?}"
391                        ));
392                    }
393                };
394            }
395        }
396
397        let git_working_dir = GitWorkingDir::from(path);
398        let path = git_working_dir.working_dir_path_ref();
399
400        // Check that the url is the same
401        if !omit_check {
402            let current_url = git_working_dir.get_url(REMOTE_NAME)?;
403            if current_url != url.as_str() {
404                warn!(
405                    "the working directory at {path:?} has an {REMOTE_NAME:?} url != {url:?}: \
406                     {current_url:?} -- setting it to the expected value"
407                );
408                git_working_dir.set_url(REMOTE_NAME, url)?;
409            }
410        }
411
412        let status = working_directory_status.status;
413
414        let mut slf = Self {
415            git_working_dir,
416            commit: None,
417            working_directory_status,
418            working_directory_status_needs_saving,
419            last_use: mtime,
420            signal_change,
421        };
422        let mut slf_lck = guard.locked_working_directory_mut(&mut slf);
423        // XX chaos: Do not change the status if it already
424        // exists. Does this even work?
425        slf_lck.set_and_save_status(status)?;
426        Ok(slf)
427    }
428
429    pub fn clone_repo<'pool>(
430        base_dir: &Path,
431        dir_file_name: &str,
432        url: &GitUrl,
433        guard: &WorkingDirectoryPoolGuard<'pool>,
434        signal_change: Option<PollingSignalsSender>,
435    ) -> Result<Self> {
436        let quiet = false;
437        let git_working_dir = git_clone(&base_dir, [], url.as_str(), dir_file_name, quiet)?;
438        let commit: GitHash = git_working_dir.get_head_commit_id()?.parse()?;
439        let status = WorkingDirectoryStatus::new();
440        let mtime = status.creation_timestamp.to_systemtime();
441        info!("clone_repo({base_dir:?}, {dir_file_name:?}, {url}) succeeded");
442        let mut slf = Self {
443            git_working_dir,
444            commit: Some(commit),
445            working_directory_status: status,
446            working_directory_status_needs_saving: true,
447            last_use: mtime,
448            signal_change,
449        };
450        let mut slf_lck = guard.locked_working_directory_mut(&mut slf);
451        slf_lck.set_and_save_status(Status::CheckedOut)?;
452        Ok(slf)
453    }
454
455    pub fn needs_cleanup(
456        &self,
457        opts: Option<&WorkingDirectoryAutoCleanOpts>,
458        have_other_jobs_for_same_commit: Option<&dyn Fn() -> bool>,
459    ) -> Result<bool> {
460        if let Some(WorkingDirectoryAutoCleanOpts {
461            min_age_days,
462            min_num_runs,
463            wait_until_commit_done,
464        }) = opts
465        {
466            let is_old_enough: bool = {
467                let min_age_days: u64 = (*min_age_days).into();
468                let min_age = Duration::from_secs(24 * 3600 * min_age_days);
469                let now = SystemTime::now();
470                let creation_time: SystemTime = self
471                    .working_directory_status
472                    .creation_timestamp
473                    .to_systemtime();
474                let age = now.duration_since(creation_time).map_err(ctx!(
475                    "calculating age for working directory {:?}",
476                    self.git_working_dir.working_dir_path_ref()
477                ))?;
478                age >= min_age
479            };
480            let is_used_enough: bool = self.working_directory_status.num_runs >= *min_num_runs;
481            Ok(is_old_enough
482                && is_used_enough
483                && ((!*wait_until_commit_done) || {
484                    if let Some(have_other_jobs_for_same_commit) = have_other_jobs_for_same_commit {
485                        have_other_jobs_for_same_commit()
486                    } else {
487                        // Could actually short-cut the calls from
488                        // polling_pool.rs to false here. But making those
489                        // configurable may still be good.
490                        true
491                    }
492                }))
493        } else {
494            info!(
495                "never cleaning up working directories since there is no \
496                 `auto_clean` configuration"
497            );
498            Ok(false)
499        }
500    }
501
502    /// Unconditionally run `git fetch --tags` in the working dir. Is
503    /// called by `checkout` as needed. If `commit_id` is given, it is
504    /// fetched explicitly
505    pub fn fetch(&self, commit_id: Option<&GitHash>) -> Result<FetchedTags> {
506        let git_working_dir = &self.git_working_dir;
507
508        // Fetching tags in case `dataset_dir_for_commit` is
509        // used.
510        let fetch_all_tags = true;
511
512        let tmp;
513        let references = if let Some(commit_id) = commit_id {
514            tmp = [commit_id.to_reference()];
515            tmp.as_slice()
516        } else {
517            &[]
518        };
519
520        // Note: this does not update branches, right? But
521        // branch names should never be used for anything, OK? XX
522        // document?  or make the method fetch branches
523        git_working_dir.fetch_references(REMOTE_NAME, fetch_all_tags, references, true)?;
524        info!(
525            "checkout({:?}, {commit_id:?}): ran fetch_references",
526            git_working_dir.working_dir_path_ref()
527        );
528
529        Ok(FetchedTags::Yes)
530    }
531}
532
533#[derive(Clone, Debug, PartialEq, Eq)]
534#[must_use]
535pub enum FetchedTags {
536    No,
537    Yes,
538}
539
540#[derive(Debug, PartialEq, Eq)]
541pub enum FetchTags {
542    WhenMissingCommit,
543    Always,
544}
545
546impl<'guard> WorkingDirectoryWithPoolLockMut<'guard> {
547    /// Retrieve commit via git if not already cached
548    pub fn commit(&mut self) -> Result<&GitHash> {
549        if self.commit.is_some() {
550            Ok(self.commit.as_ref().expect("just checked"))
551        } else {
552            let commit = self.working_directory_path().noncached_commit()?;
553            debug!(
554                "set commit field of entry for WorkingDirectory {:?} to {commit}",
555                self.wd.git_working_dir.working_dir_path_ref()
556            );
557            self.commit = Some(commit);
558            Ok(self.commit.as_ref().expect("just set"))
559        }
560    }
561
562    /// Checks and is a no-op if already on the commit.
563    pub fn checkout(&mut self, commit: GitHash, fetch_tags: FetchTags) -> Result<FetchedTags> {
564        let commit_str = commit.to_string();
565        let quiet = false;
566        let current_commit = self.wd.git_working_dir.get_head_commit_id()?;
567
568        let fetch_tags_always = match fetch_tags {
569            FetchTags::WhenMissingCommit => false,
570            FetchTags::Always => true,
571        };
572
573        let ran_fetch;
574        if current_commit == commit_str {
575            if self.commit()? != &commit {
576                bail!("consistency failure: dir on disk has different commit id from obj")
577            }
578            if fetch_tags_always {
579                ran_fetch = self.fetch(Some(&commit))?;
580            } else {
581                ran_fetch = FetchedTags::No;
582            }
583        } else {
584            let git_working_dir = &self.wd.git_working_dir;
585
586            if (!fetch_tags_always) && git_working_dir.contains_reference(&commit_str)? {
587                ran_fetch = FetchedTags::No;
588            } else {
589                ran_fetch = self.fetch(Some(&commit))?;
590            }
591
592            // First stash, merge --abort, cherry-pick --abort, and all
593            // that jazz? No, have such a dir just go set aside with error
594            // for manual fixing/removal.
595            git_working_dir.git_reset(GitResetMode::Hard, NO_OPTIONS, &commit_str, quiet)?;
596            info!(
597                "checkout({:?}, {commit}): ran git reset --hard",
598                self.wd.git_working_dir.working_dir_path_ref()
599            );
600            self.wd.commit = Some(commit);
601            self.set_and_save_status(Status::CheckedOut)?;
602        }
603        Ok(ran_fetch)
604    }
605
606    /// Set status to `status`. Also increments the run count if the
607    /// status changed to Status::Processing, and (re-)saves
608    /// `$n.status` file if needed.
609    pub fn set_and_save_status(&mut self, status: Status) -> Result<()> {
610        debug!(
611            "{:?} set_and_save_status({status:?})",
612            self.wd.git_working_dir
613        );
614        let old_status = self.wd.working_directory_status.status;
615        self.wd.working_directory_status.status = status;
616        let needs_saving;
617        if old_status != status {
618            needs_saving = true;
619            if status == Status::Processing {
620                self.wd.working_directory_status.num_runs += 1;
621            }
622        } else {
623            needs_saving = self.wd.working_directory_status_needs_saving;
624        }
625        if needs_saving {
626            let working_directory_status = &self.wd.working_directory_status;
627            let path = self.wd.status_path()?;
628            ron_to_file_pretty(working_directory_status, &path, false, None)?;
629            if !working_directory_status.status.can_be_used_for_jobs() {
630                // Mis-use executable bit to easily see error status files
631                // in dir listings on the command line.
632                std::fs::set_permissions(&path, Permissions::from_mode(0o755))
633                    .map_err(ctx!("setting executable permission on file {path:?}"))?;
634            }
635            if let Some(signal_change) = &self.signal_change {
636                signal_change.send_signal();
637            }
638            debug!(
639                "{:?} set_and_save_status({status:?}): file saved",
640                self.wd.git_working_dir
641            );
642        }
643        self.wd.working_directory_status_needs_saving = false;
644        Ok(())
645    }
646}