1use std::{
7 collections::{BTreeSet, HashMap, HashSet},
8 fmt::{Debug, Display},
9 ops::Index,
10 path::Path,
11 process::{Command, Stdio},
12 sync::{Arc, Mutex},
13};
14
15use anyhow::{Context, Result, anyhow, bail};
16use itertools::Itertools;
17use kstring::KString;
18use smallvec::SmallVec;
19
20use crate::date_and_time::unixtime::Unixtime;
21pub use crate::serde_types::git_hash::GitHash;
22
23#[derive(Debug)]
24pub struct GitCommit<RefType> {
25 pub commit_hash: GitHash,
26 pub author_time: Unixtime,
27 pub committer_time: Unixtime,
28 pub parents: SmallVec<[RefType; 1]>,
29}
30
31#[derive(Debug)]
32pub struct EnrichedGitCommit<RefType> {
33 pub commit: GitCommit<RefType>,
34 pub depth: usize,
37}
38
39impl GitCommit<Id<ToEnrichedCommit>> {
40 pub fn with_ids_as_hashes(&self, data: &GitGraphData) -> GitCommit<GitHash> {
43 let Self {
44 commit_hash,
45 author_time,
46 committer_time,
47 parents,
48 } = self;
49
50 let parents = parents
51 .iter()
52 .map(|parent| data[*parent].commit.commit_hash.clone())
53 .collect();
54
55 GitCommit {
56 commit_hash: commit_hash.clone(),
57 author_time: *author_time,
58 committer_time: *committer_time,
59 parents,
60 }
61 }
62}
63
64impl EnrichedGitCommit<Id<ToEnrichedCommit>> {
65 pub fn with_ids_as_hashes(&self, data: &GitGraphData) -> EnrichedGitCommit<GitHash> {
68 let Self { commit, depth } = self;
69 let commit = commit.with_ids_as_hashes(data);
70 EnrichedGitCommit {
71 commit,
72 depth: *depth,
73 }
74 }
75}
76
77impl Display for GitCommit<GitHash> {
80 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81 let Self {
82 commit_hash,
83 author_time,
84 committer_time,
85 parents,
86 } = self;
87 let a = author_time.0;
88 let c = committer_time.0;
89 let parents = parents.iter().map(|p| p.to_string()).join(" ");
90 write!(f, "{commit_hash},{a},{c},{parents}")
91 }
92}
93
94impl Display for EnrichedGitCommit<GitHash> {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 let Self { commit, depth } = self;
98 write!(f, "{depth}\t{commit}")
99 }
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
103pub struct Id<Kind>(u32, Kind);
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
106pub struct ToEnrichedCommit;
107
108#[derive(Debug)]
112pub struct GitGraphData {
113 by_hash: HashMap<GitHash, Id<ToEnrichedCommit>>,
114 ecommits: Vec<EnrichedGitCommit<Id<ToEnrichedCommit>>>,
115}
116
117#[derive(thiserror::Error, Debug)]
118#[error("more than u32::max commits")]
119pub struct MoreThanU32Commits;
120
121impl Id<ToEnrichedCommit> {
122 pub fn get(self, data: &GitGraphData) -> Option<&EnrichedGitCommit<Id<ToEnrichedCommit>>> {
123 data.get(self)
124 }
125}
126
127impl GitGraphData {
128 fn new() -> Self {
129 Self {
130 by_hash: HashMap::new(),
131 ecommits: Vec::new(),
132 }
133 }
134
135 pub fn get(
136 &self,
137 id: Id<ToEnrichedCommit>,
138 ) -> Option<&EnrichedGitCommit<Id<ToEnrichedCommit>>> {
139 self.ecommits
140 .get(usize::try_from(id.0).expect("at least 32 bit platform"))
141 }
142
143 pub fn get_by_hash(&self, h: &GitHash) -> Option<Id<ToEnrichedCommit>> {
144 self.by_hash.get(h).copied()
145 }
146
147 pub fn unchecked_push(
151 &mut self,
152 commit: GitCommit<Id<ToEnrichedCommit>>,
153 ) -> Result<Id<ToEnrichedCommit>, MoreThanU32Commits> {
154 let commit_hash = commit.commit_hash.clone();
155
156 let depth = commit
157 .parents
158 .iter()
159 .map(|parent_id| self.get(*parent_id).expect("XXX").depth)
160 .max()
161 .map(|m| m + 1)
162 .unwrap_or(0);
163
164 let ecommit = EnrichedGitCommit { commit, depth };
165 let id = self.ecommits.len();
166 let id = Id(
167 u32::try_from(id).map_err(|_| MoreThanU32Commits)?,
168 ToEnrichedCommit,
169 );
170 self.by_hash.insert(commit_hash, id);
171 self.ecommits.push(ecommit);
172 Ok(id)
173 }
174
175 pub fn history_as_btreeset_from(
176 &self,
177 mut id: Id<ToEnrichedCommit>,
178 ) -> BTreeSet<Id<ToEnrichedCommit>> {
179 let mut ids = BTreeSet::new();
180 let mut stack_of_commits_to_follow = Vec::new();
181 loop {
182 if ids.insert(id) {
183 let ecommit = &self[id];
184 let mut parents = ecommit.commit.parents.iter();
185 if let Some(first_parent) = parents.next() {
186 id = *first_parent;
187 for parent in parents {
188 stack_of_commits_to_follow.push(*parent);
189 }
190 continue;
191 }
192 }
193 if let Some(to_follow) = stack_of_commits_to_follow.pop() {
194 id = to_follow;
195 } else {
196 break;
197 }
198 }
199 ids
200 }
201
202 pub fn closest_matching_ancestor_of(
208 &self,
209 for_id: Id<ToEnrichedCommit>,
210 is_match: impl Fn(Id<ToEnrichedCommit>) -> bool,
211 ) -> Result<Option<Id<ToEnrichedCommit>>> {
212 let mut seen_commit_ids = HashSet::new();
216 seen_commit_ids.insert(for_id);
217
218 let mut current_commits = HashSet::new();
220 self.get(for_id)
221 .ok_or_else(|| anyhow!("invalid parent_id"))?;
222 current_commits.insert(for_id);
223
224 let get_id = |id: Id<ToEnrichedCommit>| self.get(id).expect("internal consistency");
225
226 while !current_commits.is_empty() {
227 if let Some(matching_id) = current_commits
230 .iter()
231 .copied()
232 .filter(|id| is_match(*id))
233 .max_by_key(|id| {
234 let commit = get_id(*id);
235 commit.commit.committer_time
236 })
237 {
238 return Ok(Some(matching_id));
239 }
240 let commit_id_to_follow = current_commits
243 .iter()
244 .copied()
245 .max_by_key(|id| get_id(*id).commit.committer_time)
246 .expect("exiting before here when current_commits is empty");
247 current_commits.remove(&commit_id_to_follow);
248 let commit_to_follow = get_id(commit_id_to_follow);
249 for commit_id in &commit_to_follow.commit.parents {
250 if !seen_commit_ids.contains(commit_id) {
251 current_commits.insert(*commit_id);
252 seen_commit_ids.insert(*commit_id);
253 }
254 }
255 }
256 Ok(None)
257 }
258
259 pub fn sorted_by<T: Ord>(
260 &self,
261 commits: &BTreeSet<Id<ToEnrichedCommit>>,
262 mut by: impl FnMut(&EnrichedGitCommit<Id<ToEnrichedCommit>>) -> T,
263 ) -> Vec<Id<ToEnrichedCommit>> {
264 let mut vec: Vec<_> = commits.iter().copied().collect();
265 vec.sort_by_key(|id| by(&self[*id]));
266 vec
267 }
268
269 pub fn ids_as_commits<'s: 'ids, 'ids>(
270 &'s self,
271 ids: &'ids [Id<ToEnrichedCommit>],
272 ) -> impl DoubleEndedIterator<Item = &'s EnrichedGitCommit<Id<ToEnrichedCommit>>>
273 + ExactSizeIterator
274 + use<'s, 'ids> {
275 ids.iter().map(|id| &self[*id])
276 }
277
278 pub fn commits(&self) -> &[EnrichedGitCommit<Id<ToEnrichedCommit>>] {
279 &self.ecommits
280 }
281
282 pub fn add_history_from_dir_ref(
283 &mut self,
284 in_directory: impl AsRef<Path>,
285 entry_reference: &str,
286 ) -> Result<GitEntrypoint> {
287 let in_directory = in_directory.as_ref();
288 let commits = git_log_commits(in_directory, entry_reference)?;
292 if commits.is_empty() {
293 bail!("invalid Git reference {entry_reference:?} in Git dir {in_directory:?}")
294 }
295 Ok(GitEntrypoint::from_commits(
296 KString::from_ref(entry_reference),
297 commits.iter().rev(),
298 self,
299 )?)
300 }
301}
302
303impl Index<Id<ToEnrichedCommit>> for GitGraphData {
304 type Output = EnrichedGitCommit<Id<ToEnrichedCommit>>;
305
306 fn index(&self, index: Id<ToEnrichedCommit>) -> &Self::Output {
307 &self.ecommits[usize::try_from(index.0).expect("usize must be at least 32 bit wide")]
308 }
309}
310
311#[derive(Debug)]
312pub struct GitGraph(Mutex<GitGraphData>);
313
314impl GitGraph {
315 pub fn new() -> Arc<Self> {
316 Self(Mutex::new(GitGraphData::new())).into()
317 }
318
319 pub fn lock(&self) -> std::sync::MutexGuard<'_, GitGraphData> {
320 match self.0.lock() {
321 Ok(l) => l,
322 Err(l) => l.into_inner(),
325 }
326 }
327}
328
329#[derive(Debug)]
330pub struct GitEntrypoint {
331 pub name: KString,
332 pub commit_id: Id<ToEnrichedCommit>,
333}
334
335impl GitEntrypoint {
336 pub fn from_commits<'c>(
340 entry_reference: KString,
341 commits: impl Iterator<Item = &'c GitCommit<GitHash>>,
342 graph_lock: &mut GitGraphData,
343 ) -> Result<Self, MoreThanU32Commits> {
344 let mut entry_commit_id: Option<Id<ToEnrichedCommit>> = None;
345 for GitCommit {
346 commit_hash,
347 author_time,
348 committer_time,
349 parents,
350 } in commits
351 {
352 if let Some(id) = graph_lock.get_by_hash(commit_hash) {
354 entry_commit_id = Some(id);
358 } else {
359 let commit = GitCommit {
360 commit_hash: commit_hash.clone(),
361 author_time: *author_time,
362 committer_time: *committer_time,
363 parents: parents
364 .iter()
365 .map(|parent| {
366 graph_lock.get_by_hash(parent).unwrap_or_else(|| {
367 panic!(
368 "can't find parent {parent} of commit {commit_hash} -- \
369 need commits with the oldest first!"
370 )
371 })
372 })
373 .collect(),
374 };
375 entry_commit_id = Some(graph_lock.unchecked_push(commit)?);
376 }
377 }
378 Ok(GitEntrypoint {
379 name: entry_reference,
380 commit_id: entry_commit_id.expect("to be given non-empty commits iterator"),
381 })
382 }
383}
384
385pub fn git_log_commits(
398 in_directory: impl AsRef<Path>,
399 entry_reference: &str,
400) -> Result<Vec<GitCommit<GitHash>>> {
401 let in_directory = in_directory.as_ref();
402 let mut c = Command::new("git");
403
404 c.args(&[
405 "log",
406 &{
407 let commit_hash = "%H";
408 let author_time = "%at";
409 let committer_time = "%ct";
410 let parent_hashes = "%P";
411 format!("--pretty={commit_hash},{author_time},{committer_time},{parent_hashes}")
412 },
413 entry_reference,
414 ]);
415 let str_from_bytes =
416 |bs| std::str::from_utf8(bs).expect("git always gives ascii with given arguments");
417 c.current_dir(in_directory);
418
419 c.stdout(Stdio::piped());
420 let output = c
422 .output()
423 .with_context(|| anyhow!("in directory {in_directory:?}"))?;
424 if !output.status.success() {
425 let err = String::from_utf8_lossy(&output.stderr);
427 bail!("failure in git directory {in_directory:?}: {err}",)
428 }
429 output
430 .stdout
431 .split(|b| *b == b'\n')
432 .filter(|line| !line.is_empty())
433 .map(|line| -> Result<GitCommit<GitHash>> {
434 let items: SmallVec<[_; 4]> = line.split(|b| *b == b',').collect();
435 if let [commit_hash, author_time, committer_time, parents] = items.as_slice() {
436 let commit_hash = GitHash::try_from(*commit_hash)?;
437 let author_time = Unixtime(u64::from_str_radix(str_from_bytes(author_time), 10)?);
438 let committer_time =
439 Unixtime(u64::from_str_radix(str_from_bytes(committer_time), 10)?);
440 let parents: SmallVec<_> = parents
441 .split(|b| *b == b' ')
442 .into_iter()
443 .filter(|bs| !bs.is_empty())
444 .map(GitHash::try_from)
445 .collect::<Result<_>>()?;
446 Ok(GitCommit {
447 commit_hash,
448 author_time,
449 committer_time,
450 parents,
451 })
452 } else {
453 unreachable!("4 fields from git")
454 }
455 })
456 .collect()
457}
458
459#[cfg(test)]
460mod tests {
461 use super::*;
462
463 #[test]
464 fn t_() -> Result<()> {
465 let git_in_cwd = AsRef::<Path>::as_ref("../.git");
466 if !git_in_cwd.is_dir() {
467 eprintln!("git.rs: not tested due to not being in a Git repository");
468 return Ok(());
469 }
470 let graph = GitGraph::new();
471 let mut graph_guard = graph.lock();
472
473 let interesting_commit_ids = [
474 "710e7a2f4d48124964efbe48c901259ae31cfd6a",
476 "f73da5abcc389db7754715a9fecadb478ecfbc16",
478 "9fd0ad621328a11a984aaa0700d54d05af6a899a",
480 "d165351476db2d65c3efcda89b4a99decede3784",
481 ];
482
483 let refs = interesting_commit_ids.map(|name| {
486 graph_guard
487 .add_history_from_dir_ref(git_in_cwd, name)
488 .map_err(|e| e.to_string())
489 });
490
491 dbg!(&refs);
492
493 assert_eq!(
494 refs[2].as_ref().unwrap().name.as_ref(),
495 "9fd0ad621328a11a984aaa0700d54d05af6a899a"
496 );
497
498 let ids: [Id<ToEnrichedCommit>; _] = refs.map(|res| res.unwrap().commit_id);
499
500 let depths = ids.map(|id| id.get(&graph_guard).unwrap().depth);
501 assert_eq!(depths, [166, 163, 159, 161]);
502
503 let closest = graph_guard
506 .closest_matching_ancestor_of(ids[0], |id| (&ids[2..]).contains(&id))?
507 .expect("to find it");
508 assert_eq!(closest, Id(165, ToEnrichedCommit));
509 assert_eq!(
510 closest
511 .get(&graph_guard)
512 .expect("contained")
513 .commit
514 .commit_hash
515 .to_string(),
516 "9fd0ad621328a11a984aaa0700d54d05af6a899a"
517 );
518
519 Ok(())
520 }
521}