evobench_tools/run/
key.rs

1//! The key for a benchmarking run--all parameters modifying the output
2//!
3//! The representation of the parameters (given or recorded) for a
4//! benchmarking run. This does not necessarily include *all* recorded
5//! metainformation. The aim is to allow to automatically group runs
6//! for which a sensible `summary` can be calculated, and to allow to
7//! specify sets of runs for which the `change` should be
8//! calculated. Maybe be fine-grained enough to e.g. allow to include
9//! runs from different hosts if their CPU and memory configuration is
10//! identical. And maybe provide for the means to allow for manual
11//! overrides, to include all runs in a summary with keys "close
12//! enough".
13//!
14//! Some parameters, e.g. hostname, may be irrelevant when the
15//! hardware and software versions are given; or it may turn out
16//! controlling for those is not enough; thus, some key pieces are
17//! redundant, or not?
18//!
19//! Time-of-day may be relevant (rather: were other processes shut
20//! down or not), strongly or weakly, but can't be part of the key or
21//! grouping would not work; this is a piece of information to track
22//! separately for verification.
23//!
24//! Custom parameters can be given and be relevant, e.g. whether
25//! providing input data to an application sorted or not.
26
27use std::{
28    collections::BTreeMap,
29    fmt::Display,
30    num::NonZeroU32,
31    path::{Path, PathBuf},
32    str::FromStr,
33    sync::Arc,
34};
35
36use anyhow::{Result, bail};
37use itertools::Itertools;
38use kstring::KString;
39use serde::{Deserialize, Serialize};
40
41use crate::{
42    ctx,
43    git::GitHash,
44    io_utils::bash::bash_export_variable_string,
45    key_val_fs::as_key::AsKey,
46    run::{
47        config::BenchmarkingCommand,
48        custom_parameter::{AllowedCustomParameter, CustomParameterValue},
49        env_vars::AllowableCustomEnvVar,
50        output_directory::structure::KeyDir,
51    },
52    serde_types::allowed_env_var::AllowedEnvVar,
53    utillib::crypto_hash::crypto_hash,
54};
55
56#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
57#[serde(deny_unknown_fields)]
58pub struct OsInfo {
59    /// e.g. taken from `UName.sysname`
60    pub os: String, // XX use the enum from other lib, move
61    /// e.g. "6.1.0-37-amd64"
62    pub release: String,
63    /// e.g. "#1 SMP PREEMPT_DYNAMIC Debian 6.1.140-1 (2025-05-22)"
64    pub version: String,
65}
66
67/// Information that together should allow a host to be determined to
68/// be equivalent.
69#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
70#[serde(deny_unknown_fields)]
71pub struct HostInfo {
72    pub cpu_model: String,
73    pub num_cores: NonZeroU32,
74    // ram_kb: NonZeroU32,
75    // and swap?
76    // ^ Both irrelevant as long as there's *enough* RAM.
77    // XX Thus, log these things (together with free mem
78    // before/during?/after? time of evaluation), then allow to
79    // correlate, but don't make it part of the key, OK?
80    pub os_info: OsInfo,
81}
82
83#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
84#[serde(deny_unknown_fields)]
85pub struct Host {
86    /// e.g. taken from `UName.nodename`
87    pub hostname: String,
88    pub host_info: HostInfo,
89}
90
91/// As determined by evobench (but should compare to duplicates
92/// of some of the fields in the bench log file resulting from a run)
93#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
94#[serde(deny_unknown_fields)]
95pub struct EarlyContext {
96    pub host: Host,
97    pub username: String,
98}
99
100/// Same as `CustomParameters` but not checked against a config. Keys
101/// are checked for basic correctness but not whether usable for a
102/// particular benchmark, and the values are of unknown type.
103#[derive(Debug, PartialEq, Serialize, Deserialize)]
104pub struct UncheckedCustomParameters(BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, KString>);
105
106impl From<BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, KString>> for UncheckedCustomParameters {
107    fn from(value: BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, KString>) -> Self {
108        Self(value)
109    }
110}
111
112impl UncheckedCustomParameters {
113    pub fn btree_map(&self) -> &BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, KString> {
114        &self.0
115    }
116}
117
118/// Custom key/value pairings, passed on as environment variables when
119/// executing the benchmarking runner of the target project. These
120/// are checked for allowed and required values.
121#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
122pub struct CustomParameters(BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, CustomParameterValue>);
123
124/// Extend `path` with segments leading to the folder to use for
125/// files for this run. TEMPORARY solution.
126fn extend_path<'a>(path: &mut PathBuf, key_vals: impl Iterator<Item = (&'a str, &'a str)> + 'a) {
127    for (key, val) in key_vals {
128        // key.len() + 1 + val.len() is statically guaranteed to
129        // fit in the 255 bytes of max file name length on
130        // Linux. \0 is disallowed on construction time. Since we
131        // interpolate a =, there are no possible remaining
132        // invalid cases.
133        path.push(format!("{key}={val}"));
134    }
135}
136
137pub trait ExtendPath {
138    fn key_val_strs(&self) -> impl Iterator<Item = (&str, &str)>;
139
140    fn extend_path(&self, mut path: PathBuf) -> PathBuf {
141        extend_path(&mut path, self.key_val_strs());
142        path
143    }
144}
145
146impl ExtendPath for UncheckedCustomParameters {
147    fn key_val_strs(&self) -> impl Iterator<Item = (&str, &str)> {
148        self.0.iter().map(|(k, v)| (k.as_str(), v.as_str()))
149    }
150}
151
152impl From<BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, CustomParameterValue>>
153    for CustomParameters
154{
155    fn from(value: BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, CustomParameterValue>) -> Self {
156        Self(value)
157    }
158}
159
160impl ExtendPath for CustomParameters {
161    fn key_val_strs(&self) -> impl Iterator<Item = (&str, &str)> {
162        self.0.iter().map(|(k, v)| (k.as_str(), v.as_str()))
163    }
164}
165
166impl CustomParameters {
167    pub fn btree_map(
168        &self,
169    ) -> &BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, CustomParameterValue> {
170        &self.0
171    }
172
173    pub fn keyvals(&self) -> BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, KString> {
174        self.btree_map()
175            .iter()
176            .map(|(k, v)| (k.clone(), KString::from_ref(v.as_str())))
177            .collect()
178    }
179
180    pub fn extend_path(&self, mut path: PathBuf) -> PathBuf {
181        extend_path(&mut path, self.key_val_strs());
182        path
183    }
184
185    pub fn checked_from(
186        keyvals: &BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, KString>,
187        custom_parameters_required: &BTreeMap<
188            AllowedEnvVar<AllowableCustomEnvVar>,
189            AllowedCustomParameter,
190        >,
191    ) -> Result<Self> {
192        // XX: keyvals.iter() is never containing duplicates, now that
193        // this is a BTreeMap they are removed silently by serde
194        // (bummer!)
195        let mut res = BTreeMap::new();
196        for kv in keyvals {
197            let (key, value) = kv;
198            if res.contains_key(key) {
199                bail!("duplicated custom parameter with name {:?}", key.as_str())
200            }
201            let allowable_key: AllowedEnvVar<AllowableCustomEnvVar> = AllowedEnvVar::from_str(key)?;
202            if let Some(allowed_custom_parameter) = custom_parameters_required.get(&allowable_key) {
203                let val =
204                    CustomParameterValue::checked_from(allowed_custom_parameter.r#type, value)
205                        .map_err(ctx!("for variable {:?}", key.as_str()))?;
206
207                res.insert(key.clone(), val);
208            } else {
209                let valid_params = custom_parameters_required
210                    .keys()
211                    .map(|key| format!("{:?}", key.as_str()))
212                    .join(", ");
213                bail!(
214                    "invalid custom parameter name {:?} (valid are: {valid_params})",
215                    key.as_str()
216                )
217            }
218        }
219        for (key, allowed_custom_parameter) in custom_parameters_required.iter() {
220            if allowed_custom_parameter.required {
221                if !res.contains_key(key) {
222                    bail!("missing custom parameter with name {:?}", key.as_str())
223                }
224            }
225        }
226
227        Ok(CustomParameters(res))
228    }
229
230    pub fn to_bash_export_strings(&self, prefix: &str, suffix: &str) -> Vec<String> {
231        self.btree_map()
232            .iter()
233            .map(|(k, v)| bash_export_variable_string(k, v.as_str(), prefix, suffix))
234            .collect()
235    }
236}
237
238fn display_for_btreemap<S: AsRef<str>>(
239    slf: &BTreeMap<AllowedEnvVar<AllowableCustomEnvVar>, S>,
240    f: &mut std::fmt::Formatter<'_>,
241) -> std::fmt::Result {
242    let mut is_first = true;
243    for (k, custom_parameter_value) in slf {
244        let v = (*custom_parameter_value).as_ref();
245        write!(f, "{}{k}={v}", if is_first { "" } else { "," })?;
246        is_first = false;
247    }
248    Ok(())
249}
250
251impl Display for CustomParameters {
252    /// Nice view for humans with commas, used in "evobench list
253    /// separated"
254    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
255        display_for_btreemap(self.btree_map(), f)
256    }
257}
258
259impl Display for UncheckedCustomParameters {
260    /// Nice view for humans with commas, used in "evobench list
261    /// separated"
262    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
263        display_for_btreemap(self.btree_map(), f)
264    }
265}
266
267#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize)]
268#[serde(deny_unknown_fields)]
269pub struct RunParameters {
270    pub commit_id: GitHash,
271    pub custom_parameters: Arc<CustomParameters>,
272}
273
274/// Only the parts of a BenchmarkingJob that determine results--but
275/// also excluding schedule_condition, which *does* have a conscious
276/// influence on results, but comes from the configured pipeline, not
277/// the insertion. This here is used for insertion uniqueness
278/// checking, but maybe also for key determination lager (XX todo).
279#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize)]
280pub struct BenchmarkingJobParameters {
281    pub run_parameters: Arc<RunParameters>,
282    /// NOTE that BenchmarkingCommand has both `target_name` and the
283    /// actual values; we currently make our key be based on *both* at
284    /// the same time! I.e. a job with the same actual values but
285    /// different `target_name` will be treated as another key!
286    /// (FUTURE: is this really right?)
287    pub command: Arc<BenchmarkingCommand>,
288}
289
290impl BenchmarkingJobParameters {
291    pub fn slow_hash(&self) -> BenchmarkingJobParametersHash {
292        self.into()
293    }
294
295    pub fn to_key_dir(&self, output_base_dir: Arc<Path>) -> Arc<KeyDir> {
296        KeyDir::from_benchmarking_job_parameters(output_base_dir, self)
297    }
298}
299
300#[derive(Debug, PartialEq, Clone, Eq, PartialOrd, Ord, Serialize, Deserialize)]
301pub struct BenchmarkingJobParametersHash(String);
302
303impl From<&BenchmarkingJobParameters> for BenchmarkingJobParametersHash {
304    fn from(value: &BenchmarkingJobParameters) -> Self {
305        Self(crypto_hash(value))
306    }
307}
308
309impl AsKey for BenchmarkingJobParametersHash {
310    fn as_filename_str(&self) -> std::borrow::Cow<'_, str> {
311        (&self.0).into()
312    }
313
314    fn try_from_filename_str(file_name: &str) -> Option<Self> {
315        Some(Self(file_name.into()))
316    }
317}
318
319/// As output by the benchmark runner of the target project (currently
320/// always the evobench-probes library)
321#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
322#[serde(deny_unknown_fields)]
323pub struct LateContext {
324    /// Taken from `log_message::Metadata`: including version, as determined
325    /// by evobench-probes, e.g. "GCC 12.2.0"
326    pub compiler: String,
327}
328
329#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
330#[serde(deny_unknown_fields)]
331pub struct Key {
332    /// Info gleaned by evobench before executing a run.
333    pub early_context: EarlyContext,
334    /// Parameters requested by the user and passed to the benchmark
335    /// runner of the target project.
336    pub run_parameters: RunParameters,
337    /// Info gleaned by evobench from the output file of the
338    /// evobench-probes library after executing a run.
339    pub late_context: LateContext,
340}