evobench_tools/util/
grep_diff.rs

1use std::path::{Path, PathBuf};
2
3use anyhow::{Result, anyhow, bail};
4use chrono::{DateTime, FixedOffset};
5use cj_path_util::path_util::AppendToPath;
6use itertools::Itertools;
7use regex::Regex;
8
9use crate::{
10    ctx,
11    git::GitHash,
12    info,
13    run::{
14        command_log_file::{CommandLog, CommandLogFile, ParseCommandLogError},
15        env_vars::AllowableCustomEnvVar,
16        key::{BenchmarkingJobParameters, RunParameters},
17    },
18    serde_types::{
19        allowed_env_var::AllowedEnvVar, proper_dirname::ProperDirname,
20        proper_filename::ProperFilename,
21    },
22    times::{NanoTime, ToStringSeconds},
23    warn,
24};
25
26// != evaluator::data::log_data::Timing
27pub struct Timing {
28    timestamp: DateTime<FixedOffset>,
29    lineno: usize,
30    rest: String,
31}
32
33// != evaluator::data::log_data_tree::Span
34pub struct Span {
35    start: Timing,
36    end: Timing,
37}
38
39impl Span {
40    /// Could be negative for invalid logfiles
41    fn duration(&self) -> chrono::Duration {
42        let Self { start, end } = self;
43        end.timestamp.signed_duration_since(start.timestamp)
44    }
45
46    /// Returns errors for durations that are negative or too large
47    fn duration_nanotime(&self) -> Result<NanoTime> {
48        let duration = self.duration();
49        let ns: i64 = duration
50            .num_nanoseconds()
51            .ok_or_else(|| anyhow!("time span does not fit u64 nanoseconds"))?;
52        let ns =
53            u64::try_from(ns).map_err(ctx!("trying to convert duration to unsigned number"))?;
54
55        NanoTime::from_nsec(ns).ok_or_else(|| anyhow!("duration too large to fit NanoTime"))
56    }
57}
58
59pub struct GrepDiffRegion {
60    pub regex_start: Regex,
61    pub regex_end: Regex,
62}
63
64impl GrepDiffRegion {
65    pub fn from_strings(regex_start: &str, regex_end: &str) -> Result<Self> {
66        let (regex_start, regex_end) = (
67            Regex::new(regex_start).map_err(ctx!("compiling start regex {regex_start:?}"))?,
68            Regex::new(regex_end).map_err(ctx!("compiling end regex {regex_end:?}"))?,
69        );
70        Ok(Self {
71            regex_start,
72            regex_end,
73        })
74    }
75
76    /// Returns the start-`Timing` as an error if no match for `regex_end`
77    /// was found after it.
78    pub fn find_matching_spans_for(
79        &self,
80        (log_contents, lineno): (&str, usize),
81    ) -> Result<Vec<Span>, Timing> {
82        let mut spans = Vec::new();
83        let mut lines = log_contents.split('\n').enumerate();
84        while let Some((lineno0, line)) = lines.next() {
85            if let Some((t, rest)) = line.split_once('\t') {
86                if self.regex_start.is_match(rest) {
87                    if let Ok(timestamp) = DateTime::parse_from_rfc3339(t) {
88                        let start = Timing {
89                            timestamp,
90                            lineno: lineno + lineno0,
91                            rest: rest.into(),
92                        };
93                        'inner: {
94                            while let Some((lineno0, line)) = lines.next() {
95                                if let Some((t, rest)) = line.split_once('\t') {
96                                    if self.regex_end.is_match(rest) {
97                                        if let Ok(timestamp) = DateTime::parse_from_rfc3339(t) {
98                                            let end = Timing {
99                                                timestamp,
100                                                lineno: lineno + lineno0,
101                                                rest: rest.into(),
102                                            };
103                                            spans.push(Span { start, end });
104                                            break 'inner;
105                                        }
106                                    }
107                                }
108                            }
109                            return Err(start);
110                        }
111                    }
112                }
113            }
114        }
115        Ok(spans)
116    }
117
118    // Extract the single expected time span
119    pub fn find_duration_for<P: AsRef<Path>>(
120        &self,
121        command_log: &CommandLog<P>,
122    ) -> Result<Option<Span>> {
123        match self.find_matching_spans_for(command_log.log_contents_rest()) {
124            Ok(mut spans) => match spans.len() {
125                0 => {
126                    warn!("file {:?} has no match", command_log.path());
127                    Ok(None)
128                }
129                1 => Ok(spans.pop()),
130                _ => {
131                    let msg = spans
132                        .iter()
133                        .map(|Span { start, end }| {
134                            format!("lines {} to {}", start.lineno, end.lineno)
135                        })
136                        .join(", ");
137                    bail!(
138                        "file {:?} has more than one match: {msg}",
139                        command_log.path()
140                    );
141                }
142            },
143            Err(Timing {
144                timestamp,
145                lineno,
146                rest,
147            }) => {
148                warn!(
149                    "file {}:{} matches start but no end thereafter: {timestamp}\t{rest}",
150                    command_log.path_string_lossy(),
151                    lineno
152                );
153                Ok(None)
154            }
155        }
156    }
157
158    pub fn grep_diff(
159        &self,
160        logfiles: Vec<PathBuf>,
161        commit_filter: Option<GitHash>,
162        target_name_filter: Option<ProperDirname>,
163        params_filter: Option<String>,
164    ) -> Result<()> {
165        let params_filter = if let Some(params_filter) = &params_filter {
166            let mut keyvals = Vec::new();
167            for keyval in params_filter.split('/') {
168                let (key, val) = keyval.split_once('=').ok_or_else(|| {
169                    anyhow!("missing '=' in variable key-value pair string {keyval:?}")
170                })?;
171                let key: AllowedEnvVar<AllowableCustomEnvVar> = key.parse()?;
172                keyvals.push((key, val));
173            }
174            keyvals
175        } else {
176            Vec::new()
177        };
178
179        'logfile: for logfile in &logfiles {
180            let command_log_file = CommandLogFile { path: logfile };
181            let command_log = command_log_file.command_log()?;
182            let BenchmarkingJobParameters {
183                run_parameters,
184                command,
185            } = match command_log.parse_log_file_params() {
186                Ok(params) => params,
187                Err(e) => match e {
188                    ParseCommandLogError::MissingHead(_) => {
189                        warn!("file {logfile:?} has no log file info header, skipping");
190                        continue 'logfile;
191                    }
192                    e => Err(e)?,
193                },
194            };
195            #[allow(unused)]
196            let log_contents = ();
197
198            let RunParameters {
199                commit_id,
200                custom_parameters,
201            } = &*run_parameters;
202            let target_name = &command.target_name;
203
204            // Filter according to given filtering options
205
206            if let Some(commit) = &commit_filter {
207                if commit != commit_id {
208                    info!("file {logfile:?} does not match commit");
209                    continue 'logfile;
210                }
211            }
212
213            if let Some(target_name_filter) = &target_name_filter {
214                if target_name_filter != target_name {
215                    info!("file {logfile:?} does not match target name");
216                    continue 'logfile;
217                }
218            }
219
220            for (key, val) in &params_filter {
221                if let Some(log_val) = custom_parameters.btree_map().get(key) {
222                    if *val != log_val.as_str() {
223                        info!("file {logfile:?} does not match custom variable '{key}'='{val}'");
224                        continue 'logfile;
225                    }
226                } else {
227                    info!("file {logfile:?} does not use custom variable '{key}'");
228                    continue 'logfile;
229                }
230            }
231
232            if let Some(span) = self.find_duration_for(&command_log)? {
233                let duration = span.duration_nanotime().map_err(ctx!(
234                    "file {}:{} to {}",
235                    command_log.path_string_lossy(),
236                    span.start.lineno,
237                    span.end.lineno
238                ))?;
239
240                let logfile_str = logfile.to_string_lossy();
241                println!(
242                    "{}\t{commit_id}\t{}\t{custom_parameters}\t{logfile_str}:{}",
243                    duration.to_string_seconds(),
244                    target_name.as_str(),
245                    span.start.lineno
246                );
247            }
248        }
249
250        Ok(())
251    }
252}
253
254#[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)]
255pub struct LogExtract {
256    pub filename: ProperFilename,
257    pub regex_start: String,
258    pub regex_end: String,
259}
260
261impl LogExtract {
262    /// Extract from the given command log the given parameters and
263    /// write the resulting time to the given filename below the
264    /// `output_base_dir`.
265    pub fn extract_seconds_from<P: AsRef<Path>>(
266        &self,
267        command_log: &CommandLog<P>,
268        output_base_dir: &Path,
269    ) -> Result<()> {
270        let Self {
271            filename,
272            regex_start,
273            regex_end,
274        } = self;
275        let grep_diff_region = GrepDiffRegion::from_strings(regex_start, regex_end)?;
276        if let Some(span) = grep_diff_region.find_duration_for(command_log)? {
277            let output_path = output_base_dir.append(filename.as_ref());
278            // XX copy-paste from `fn grep_diff`
279            let duration = span.duration_nanotime().map_err(ctx!(
280                "file {}:{} to {}",
281                command_log.path_string_lossy(),
282                span.start.lineno,
283                span.end.lineno
284            ))?;
285
286            let mut contents = duration.to_string_seconds();
287            contents.push_str("\n");
288            std::fs::write(&output_path, contents).map_err(ctx!("writing to {output_path:?}"))?;
289        } else {
290            info!("span {regex_start:?} .. {regex_end:?} for {filename:?} not found");
291        }
292        Ok(())
293    }
294}