1use std::path::{Path, PathBuf};
2
3use anyhow::{Result, anyhow, bail};
4use chrono::{DateTime, FixedOffset};
5use cj_path_util::path_util::AppendToPath;
6use itertools::Itertools;
7use regex::Regex;
8
9use crate::{
10 ctx,
11 git::GitHash,
12 info,
13 run::{
14 command_log_file::{CommandLog, CommandLogFile, ParseCommandLogError},
15 env_vars::AllowableCustomEnvVar,
16 key::{BenchmarkingJobParameters, RunParameters},
17 },
18 serde_types::{
19 allowed_env_var::AllowedEnvVar, proper_dirname::ProperDirname,
20 proper_filename::ProperFilename,
21 },
22 times::{NanoTime, ToStringSeconds},
23 warn,
24};
25
26pub struct Timing {
28 timestamp: DateTime<FixedOffset>,
29 lineno: usize,
30 rest: String,
31}
32
33pub struct Span {
35 start: Timing,
36 end: Timing,
37}
38
39impl Span {
40 fn duration(&self) -> chrono::Duration {
42 let Self { start, end } = self;
43 end.timestamp.signed_duration_since(start.timestamp)
44 }
45
46 fn duration_nanotime(&self) -> Result<NanoTime> {
48 let duration = self.duration();
49 let ns: i64 = duration
50 .num_nanoseconds()
51 .ok_or_else(|| anyhow!("time span does not fit u64 nanoseconds"))?;
52 let ns =
53 u64::try_from(ns).map_err(ctx!("trying to convert duration to unsigned number"))?;
54
55 NanoTime::from_nsec(ns).ok_or_else(|| anyhow!("duration too large to fit NanoTime"))
56 }
57}
58
59pub struct GrepDiffRegion {
60 pub regex_start: Regex,
61 pub regex_end: Regex,
62}
63
64impl GrepDiffRegion {
65 pub fn from_strings(regex_start: &str, regex_end: &str) -> Result<Self> {
66 let (regex_start, regex_end) = (
67 Regex::new(regex_start).map_err(ctx!("compiling start regex {regex_start:?}"))?,
68 Regex::new(regex_end).map_err(ctx!("compiling end regex {regex_end:?}"))?,
69 );
70 Ok(Self {
71 regex_start,
72 regex_end,
73 })
74 }
75
76 pub fn find_matching_spans_for(
79 &self,
80 (log_contents, lineno): (&str, usize),
81 ) -> Result<Vec<Span>, Timing> {
82 let mut spans = Vec::new();
83 let mut lines = log_contents.split('\n').enumerate();
84 while let Some((lineno0, line)) = lines.next() {
85 if let Some((t, rest)) = line.split_once('\t') {
86 if self.regex_start.is_match(rest) {
87 if let Ok(timestamp) = DateTime::parse_from_rfc3339(t) {
88 let start = Timing {
89 timestamp,
90 lineno: lineno + lineno0,
91 rest: rest.into(),
92 };
93 'inner: {
94 while let Some((lineno0, line)) = lines.next() {
95 if let Some((t, rest)) = line.split_once('\t') {
96 if self.regex_end.is_match(rest) {
97 if let Ok(timestamp) = DateTime::parse_from_rfc3339(t) {
98 let end = Timing {
99 timestamp,
100 lineno: lineno + lineno0,
101 rest: rest.into(),
102 };
103 spans.push(Span { start, end });
104 break 'inner;
105 }
106 }
107 }
108 }
109 return Err(start);
110 }
111 }
112 }
113 }
114 }
115 Ok(spans)
116 }
117
118 pub fn find_duration_for<P: AsRef<Path>>(
120 &self,
121 command_log: &CommandLog<P>,
122 ) -> Result<Option<Span>> {
123 match self.find_matching_spans_for(command_log.log_contents_rest()) {
124 Ok(mut spans) => match spans.len() {
125 0 => {
126 warn!("file {:?} has no match", command_log.path());
127 Ok(None)
128 }
129 1 => Ok(spans.pop()),
130 _ => {
131 let msg = spans
132 .iter()
133 .map(|Span { start, end }| {
134 format!("lines {} to {}", start.lineno, end.lineno)
135 })
136 .join(", ");
137 bail!(
138 "file {:?} has more than one match: {msg}",
139 command_log.path()
140 );
141 }
142 },
143 Err(Timing {
144 timestamp,
145 lineno,
146 rest,
147 }) => {
148 warn!(
149 "file {}:{} matches start but no end thereafter: {timestamp}\t{rest}",
150 command_log.path_string_lossy(),
151 lineno
152 );
153 Ok(None)
154 }
155 }
156 }
157
158 pub fn grep_diff(
159 &self,
160 logfiles: Vec<PathBuf>,
161 commit_filter: Option<GitHash>,
162 target_name_filter: Option<ProperDirname>,
163 params_filter: Option<String>,
164 ) -> Result<()> {
165 let params_filter = if let Some(params_filter) = ¶ms_filter {
166 let mut keyvals = Vec::new();
167 for keyval in params_filter.split('/') {
168 let (key, val) = keyval.split_once('=').ok_or_else(|| {
169 anyhow!("missing '=' in variable key-value pair string {keyval:?}")
170 })?;
171 let key: AllowedEnvVar<AllowableCustomEnvVar> = key.parse()?;
172 keyvals.push((key, val));
173 }
174 keyvals
175 } else {
176 Vec::new()
177 };
178
179 'logfile: for logfile in &logfiles {
180 let command_log_file = CommandLogFile { path: logfile };
181 let command_log = command_log_file.command_log()?;
182 let BenchmarkingJobParameters {
183 run_parameters,
184 command,
185 } = match command_log.parse_log_file_params() {
186 Ok(params) => params,
187 Err(e) => match e {
188 ParseCommandLogError::MissingHead(_) => {
189 warn!("file {logfile:?} has no log file info header, skipping");
190 continue 'logfile;
191 }
192 e => Err(e)?,
193 },
194 };
195 #[allow(unused)]
196 let log_contents = ();
197
198 let RunParameters {
199 commit_id,
200 custom_parameters,
201 } = &*run_parameters;
202 let target_name = &command.target_name;
203
204 if let Some(commit) = &commit_filter {
207 if commit != commit_id {
208 info!("file {logfile:?} does not match commit");
209 continue 'logfile;
210 }
211 }
212
213 if let Some(target_name_filter) = &target_name_filter {
214 if target_name_filter != target_name {
215 info!("file {logfile:?} does not match target name");
216 continue 'logfile;
217 }
218 }
219
220 for (key, val) in ¶ms_filter {
221 if let Some(log_val) = custom_parameters.btree_map().get(key) {
222 if *val != log_val.as_str() {
223 info!("file {logfile:?} does not match custom variable '{key}'='{val}'");
224 continue 'logfile;
225 }
226 } else {
227 info!("file {logfile:?} does not use custom variable '{key}'");
228 continue 'logfile;
229 }
230 }
231
232 if let Some(span) = self.find_duration_for(&command_log)? {
233 let duration = span.duration_nanotime().map_err(ctx!(
234 "file {}:{} to {}",
235 command_log.path_string_lossy(),
236 span.start.lineno,
237 span.end.lineno
238 ))?;
239
240 let logfile_str = logfile.to_string_lossy();
241 println!(
242 "{}\t{commit_id}\t{}\t{custom_parameters}\t{logfile_str}:{}",
243 duration.to_string_seconds(),
244 target_name.as_str(),
245 span.start.lineno
246 );
247 }
248 }
249
250 Ok(())
251 }
252}
253
254#[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)]
255pub struct LogExtract {
256 pub filename: ProperFilename,
257 pub regex_start: String,
258 pub regex_end: String,
259}
260
261impl LogExtract {
262 pub fn extract_seconds_from<P: AsRef<Path>>(
266 &self,
267 command_log: &CommandLog<P>,
268 output_base_dir: &Path,
269 ) -> Result<()> {
270 let Self {
271 filename,
272 regex_start,
273 regex_end,
274 } = self;
275 let grep_diff_region = GrepDiffRegion::from_strings(regex_start, regex_end)?;
276 if let Some(span) = grep_diff_region.find_duration_for(command_log)? {
277 let output_path = output_base_dir.append(filename.as_ref());
278 let duration = span.duration_nanotime().map_err(ctx!(
280 "file {}:{} to {}",
281 command_log.path_string_lossy(),
282 span.start.lineno,
283 span.end.lineno
284 ))?;
285
286 let mut contents = duration.to_string_seconds();
287 contents.push_str("\n");
288 std::fs::write(&output_path, contents).map_err(ctx!("writing to {output_path:?}"))?;
289 } else {
290 info!("span {regex_start:?} .. {regex_end:?} for {filename:?} not found");
291 }
292 Ok(())
293 }
294}