evobench_tools/run/output_directory/
post_process.rs

1//! Post-processing of the output files from a benchmark run, or a set
2//! of benchmark runs belonging to the same 'key'.
3
4use std::{
5    collections::{HashMap, hash_map::Entry},
6    ffi::OsString,
7    path::{Path, PathBuf},
8    process::Command,
9    sync::Arc,
10};
11
12use anyhow::{Result, bail};
13use cj_path_util::{path_util::AppendToPath, unix::polyfill::add_extension};
14
15use crate::{
16    ctx, info,
17    io_utils::zstd_file::compress_file,
18    run::{
19        command_log_file::CommandLogFile,
20        config::{RunConfig, ScheduleCondition},
21        output_directory::structure::{KeyDir, RunDir, SubDirs, ToPath},
22    },
23    serde_types::{proper_dirname::ProperDirname, proper_filename::ProperFilename},
24    utillib::logging::{LogLevel, log_level},
25};
26
27/// `target_path` must include the `.zstd` extension. XX why does this
28/// not always do .tmp and then rename, for safety? Rather, have an
29/// `omit_rename` argument and then leave .tmp suffix in place?
30pub fn compress_file_as(
31    source_path: &Path,
32    target_path: PathBuf,
33    add_tmp_suffix: bool,
34) -> Result<PathBuf> {
35    let actual_target_path = if add_tmp_suffix {
36        add_extension(&target_path, "tmp").expect("got filename")
37    } else {
38        target_path
39    };
40    compress_file(
41        source_path,
42        &actual_target_path,
43        // be quiet when:
44        log_level() < LogLevel::Info,
45    )?;
46    // Do *not* remove the source file here as
47    // TemporaryFile::drop will do it.
48    Ok(actual_target_path)
49}
50
51// XX here, *too*, do capture for consistency? XX: could do "nice" scheduling here.
52pub fn evobench_eval(args: &[OsString]) -> Result<()> {
53    let prog = "evobench-eval";
54    let mut c = Command::new(prog);
55    c.args(args);
56    let mut child = c.spawn().map_err(ctx!("spawning command {c:?}"))?;
57    let status = child.wait()?;
58    if status.success() {
59        Ok(())
60    } else {
61        bail!("running {prog:?} with args {args:?}: {status}")
62    }
63}
64
65fn generate_summary(
66    key_dir: &Path,
67    job_output_dirs: &[RunDir],
68    selector: &str,        // "avg" or so
69    target_type_opt: &str, // "--excel" or so
70    file_base_name: &str,
71) -> Result<()> {
72    let mut args: Vec<OsString> = Vec::new();
73    args.push("summary".into());
74
75    args.push("--summary-field".into()); // XXX *is* right one right? OPEN
76    args.push(selector.into());
77
78    args.push(target_type_opt.into());
79    args.push(key_dir.append(file_base_name).into());
80
81    for job_output_dir in job_output_dirs {
82        let evobench_log = job_output_dir.to_path().append("evobench.log.zstd");
83        if std::fs::exists(&evobench_log).map_err(ctx!("checking path {evobench_log:?}"))? {
84            args.push(evobench_log.into());
85        } else {
86            info!("missing file {evobench_log:?}, empty dir?");
87        }
88    }
89
90    evobench_eval(&args)?;
91
92    Ok(())
93}
94
95const SUMMARIES: &[(&str, &str, &str)] = &[
96    ("sum", "--flame", ""),
97    ("avg", "--excel", ".xlsx"),
98    ("sum", "--excel", ".xlsx"),
99];
100
101/// Situation `None` means across all outputs; otherwise "night" etc.
102pub fn generate_all_summaries_for_situation(
103    situation: Option<&ProperFilename>,
104    key_dir: &Path,
105    job_output_dirs: &[RunDir],
106) -> Result<()> {
107    for (selector, target, suffix) in SUMMARIES {
108        let mut basename = format!("{selector}-summary");
109        if let Some(situation) = situation {
110            basename = format!("{basename}-{}", situation.as_str());
111        }
112        basename.push_str(suffix);
113        generate_summary(key_dir, job_output_dirs, selector, target, &basename)?;
114    }
115    Ok(())
116}
117
118impl RunDir {
119    /// Produce the "single" extract files, as well as other
120    /// configured derivatives. After the standard "single" extracts
121    /// succeeded, `evaluating_benchmark_file_succeeded` is run; it
122    /// should remove the file at `evobench_log_path` if this is the
123    /// initial run and `evobench_log_path` pointed to e.g. a
124    /// tmpfs. Pass a no-op if calling later on. If
125    /// `evobench_log_path` is None, then the standard location is
126    /// used.  If `no_summary_stats` is true, skips Excel and
127    /// flamegraph generation for the evobench.log data (other
128    /// post-processing is still done, i.e. configured extractions)
129    pub fn post_process_single(
130        &self,
131        evobench_log_path: Option<&Path>,
132        evaluating_benchmark_file_succeeded: impl FnOnce() -> Result<()>,
133        target_name: &ProperDirname,
134        standard_log_path: &Path,
135        run_config: &RunConfig,
136        no_stats: bool,
137    ) -> Result<()> {
138        info!("evaluating benchmark file");
139
140        let default_path_;
141        let evobench_log_path = if let Some(path) = evobench_log_path {
142            path
143        } else {
144            default_path_ = self.evobench_log_path();
145            &default_path_
146        };
147
148        if !no_stats {
149            // Doing this *before* possibly renaming the file via
150            // `evaluating_benchmark_file_succeeded`, as a way to ensure
151            // that no invalid files end up in the results pool!
152
153            {
154                let mut args = vec!["single".into(), evobench_log_path.into()];
155                if run_config.eval_settings.show_thread_number {
156                    args.push("--show-thread-number".into());
157                }
158                args.push("--excel".into());
159                args.push(self.append_str("single.xlsx")?.into());
160
161                evobench_eval(&args)?;
162            }
163
164            // It's a bit inefficient to read the $EVOBENCH_LOG twice, but
165            // currently can't change the options (--show-thread-number)
166            // without a separate run. (Will be low cost once caching is
167            // done.)
168            evobench_eval(&vec![
169                "single".into(),
170                evobench_log_path.into(),
171                "--flame".into(),
172                self.append_str("single")?.into(),
173            ])?;
174        }
175
176        evaluating_benchmark_file_succeeded()?;
177        // The above may have unlinked evobench_log_path, thus prevent further use:
178        #[allow(unused)]
179        let evobench_log_path = ();
180
181        // Find the `LogExtract`s for the `target_name`
182        if let Some(target) = run_config.targets.get(target_name) {
183            if let Some(log_extracts) = &target.log_extracts {
184                if !log_extracts.is_empty() {
185                    info!("performing log extracts");
186
187                    let command_log_file = CommandLogFile::from(standard_log_path);
188                    let command_log = command_log_file.command_log()?;
189
190                    for log_extract in log_extracts {
191                        log_extract.extract_seconds_from(&command_log, self.to_path())?;
192                    }
193                }
194            } else {
195                info!("no log extracts are configured");
196            }
197        } else {
198            info!(
199                "haven't found target {target_name:?}, old job before \
200                 configuration change?"
201            );
202        }
203
204        Ok(())
205    }
206}
207
208impl KeyDir {
209    /// If `no_summary_stats` is true, skips Excel and flamegraph
210    /// generation for the evobench.log data (which currently is all
211    /// that this method is doing, but in the future it might do stats
212    /// of other data)
213    pub fn generate_summaries_for_key_dir(self: &Arc<Self>, no_summary_stats: bool) -> Result<()> {
214        let key_dir = self.to_path();
215        info!("(re-)evaluating the summary files across all results in key dir {key_dir:?}");
216
217        let run_dirs = self.sub_dirs()?.collect::<Result<Vec<_>>>()?;
218
219        if !no_summary_stats {
220            generate_all_summaries_for_situation(None, key_dir, &run_dirs)?;
221        }
222
223        {
224            let mut job_output_dirs_by_situation: HashMap<ProperFilename, Vec<RunDir>> =
225                HashMap::new();
226            for run_dir in run_dirs {
227                let schedule_condition_path = run_dir.to_path().append("schedule_condition.ron");
228                match std::fs::read_to_string(&schedule_condition_path) {
229                    Ok(s) => {
230                        let schedule_condition: ScheduleCondition = ron::from_str(&s)
231                            .map_err(ctx!("reading file {schedule_condition_path:?}"))?;
232                        if let Some(situation) = schedule_condition.situation() {
233                            // XX it's just too long, proper abstraction pls?
234                            match job_output_dirs_by_situation.entry(situation.clone()) {
235                                Entry::Occupied(mut occupied_entry) => {
236                                    occupied_entry.get_mut().push(run_dir);
237                                }
238                                Entry::Vacant(vacant_entry) => {
239                                    vacant_entry.insert(vec![run_dir]);
240                                }
241                            }
242                        }
243                    }
244                    Err(e) => match e.kind() {
245                        std::io::ErrorKind::NotFound => (),
246                        _ => Err(e).map_err(ctx!("reading file {schedule_condition_path:?}"))?,
247                    },
248                }
249            }
250
251            for (situation, job_output_dirs) in job_output_dirs_by_situation.iter() {
252                if !no_summary_stats {
253                    generate_all_summaries_for_situation(
254                        Some(situation),
255                        &key_dir,
256                        job_output_dirs.as_slice(),
257                    )?;
258                }
259            }
260        }
261        Ok(())
262    }
263}