1pub mod average;
8pub mod weighted;
9
10use std::marker::PhantomData;
11use std::{borrow::Cow, str::FromStr};
12
13use anyhow::bail;
14use fixed::FixedU128;
15use fixed::traits::LossyFrom;
16use fixed::types::extra::U32;
17use num_traits::Zero;
18
19use super::{
20 stats::{
21 average::Average,
22 weighted::{IndexedNumbers, WeightedValue},
23 },
24 tables::table_view::{ColumnFormatting, Highlight, TableViewRow, Unit},
25};
26use crate::times::{MicroTime, NanoTime, ToStringMilliseconds};
27
28fn f64_from_fixed(f: FixedU128<U32>) -> f64 {
29 f.to_num()
30}
31
32fn u64_from_fixed(f: FixedU128<U32>) -> Option<u64> {
33 let x: FixedU128<U32> = f.round();
34 let n = u128::lossy_from(x);
35 n.try_into().ok()
36}
37
38fn absdiff(a: FixedU128<U32>, b: FixedU128<U32>) -> FixedU128<U32> {
39 if a > b { a - b } else { b - a }
40}
41
42fn square(x: FixedU128<U32>) -> FixedU128<U32> {
43 x * x
44}
45
46#[derive(Debug, PartialEq, Clone, Copy)]
49pub enum StatsField<const TILE_COUNT: usize> {
50 N,
51 Sum,
52 Average,
53 Median,
54 SD,
55 Tile(u32),
56}
57
58impl<const TILE_COUNT: usize> FromStr for StatsField<TILE_COUNT> {
59 type Err = anyhow::Error;
60
61 fn from_str(s: &str) -> Result<Self, Self::Err> {
62 use StatsField::*;
63 match s {
64 "n" | "N" => Ok(N),
65 "sum" | "Sum" | "total" | "Total" => Ok(Sum),
66 "average" | "Average" | "avg" | "mean" => Ok(Average),
67 "median" | "Median" => Ok(Median),
68 "sd" | "SD" | "stdev" => Ok(SD),
69 _ => match f64::from_str(s) {
70 Ok(x) => {
71 if x >= 0. && x <= 1.00000000001 {
72 let i = (TILE_COUNT as f64 * x + 0.5).floor() as u32;
73 Ok(Tile(i))
74 } else {
75 bail!(
76 "expecting one of n|sum|average|median|sd or a floating \
77 point number between 0 and 1, got: {x}"
78 )
79 }
80 }
81 Err(e) => bail!(
82 "expecting one of n|sum|average|median|sd or a floating \
83 point number between 0 and 1, floating point parse error for {s:?}: {e:#}"
84 ),
85 },
86 }
87 }
88}
89
90pub trait ToStatsString {
93 const UNIT_SHORT: &str;
94
95 fn to_stats_string(&self) -> String;
96}
97
98impl ToStatsString for MicroTime {
99 const UNIT_SHORT: &str = "ms";
100
101 fn to_stats_string(&self) -> String {
102 self.to_string_ms()
103 }
104}
105
106impl ToStatsString for NanoTime {
107 const UNIT_SHORT: &str = "ms";
108
109 fn to_stats_string(&self) -> String {
110 self.to_string_ms()
111 }
112}
113
114impl ToStatsString for u64 {
116 const UNIT_SHORT: &str = "count";
117
118 fn to_stats_string(&self) -> String {
119 self.to_string()
120 }
121}
122
123#[derive(Debug)]
128pub struct Stats<ViewType, const TILE_COUNT: usize> {
129 view_type: PhantomData<fn() -> ViewType>,
130 pub num_values: usize,
131 pub sum: u128,
132 pub average: FixedU128<U32>,
133 pub median: u64,
135 pub variance: FixedU128<U32>,
137 pub tiles: Vec<u64>,
144}
145
146#[derive(thiserror::Error, Debug)]
147pub enum StatsError {
148 #[error("no inputs given")]
149 NoInputs,
150 #[error("u128 saturated -- u64::MAX values summing up to near u128::MAX")]
151 SaturatedU128,
152 #[error("the virtual count (a u64) does not fit the usize range on this machine")]
153 VirtualCountDoesNotFitUSize,
154 #[error("the virtual sum does not fit the U96 range")]
155 VirtualSumDoesNotFitU96,
156}
157
158impl<ViewType, const TILE_COUNT: usize> Stats<ViewType, TILE_COUNT> {
159 pub fn average_u64(&self) -> u64 {
161 u64_from_fixed(self.average).expect("average always fits its original number range")
162 }
163
164 pub fn variance_f64(&self) -> f64 {
165 f64_from_fixed(self.variance)
166 }
167
168 pub fn standard_deviation_u64(&self) -> u64 {
171 (self.variance_f64().sqrt() + 0.5) as u64
174 }
175
176 #[inline]
179 pub fn get(&self, field: StatsField<TILE_COUNT>) -> u64 {
180 match field {
181 StatsField::N => self
182 .num_values
183 .try_into()
184 .expect("hopefully in range -- realistically it should be"),
185 StatsField::Sum => self
186 .sum
187 .try_into()
188 .expect("hopefully in range -- realistically it should be"),
189 StatsField::Average => self.average_u64(),
190 StatsField::Median => self.median,
191 StatsField::SD => self.standard_deviation_u64(),
192 StatsField::Tile(i) => {
193 self.tiles[usize::try_from(i).expect("u32 always works as index")]
194 }
195 }
196 }
197
198 pub fn column_of_field(field: StatsField<TILE_COUNT>) -> usize {
200 match field {
201 StatsField::N => 0,
202 StatsField::Sum => 1,
203 StatsField::Average => 2,
204 StatsField::Median => 3,
205 StatsField::SD => 4,
206 StatsField::Tile(i) => 6 + usize::try_from(i).expect("u32 always works as index"),
207 }
208 }
209
210 pub fn field_type_is_count(field: StatsField<TILE_COUNT>) -> bool {
213 match field {
214 StatsField::N => true,
215 StatsField::Sum => false,
216 StatsField::Average => false,
217 StatsField::Median => false,
218 StatsField::SD => false,
219 StatsField::Tile(_) => false,
220 }
221 }
222
223 pub fn from_values_from_field(
226 field: StatsField<TILE_COUNT>,
227 vals: Vec<WeightedValue>,
228 ) -> Result<SubStats<ViewType, TILE_COUNT>, StatsError> {
229 if Self::field_type_is_count(field) {
230 Ok(SubStats::Count(Stats::from_values(vals)?))
231 } else {
232 Ok(SubStats::ViewType(Stats::from_values(vals)?))
233 }
234 }
235
236 pub fn from_values(mut vals: Vec<WeightedValue>) -> Result<Self, StatsError> {
240 {
241 let num_weights = vals.len();
242 if num_weights.is_zero() {
243 return Err(StatsError::NoInputs);
244 }
245 }
246
247 let (virtual_count, virtual_sum): (u64, u128) =
248 vals.iter()
249 .fold((0, 0), |(count, sum), WeightedValue { value, weight }| {
250 let weight = u64::from(u32::from(*weight));
251 (
252 count + weight,
253 sum + u128::from(*value) * u128::from(weight),
254 )
255 });
256
257 let virtual_count_fixed = FixedU128::<U32>::from_num(virtual_count);
258
259 let average: FixedU128<U32> = {
265 let virtual_sum = FixedU128::<U32>::checked_from_num(virtual_sum)
269 .ok_or_else(|| StatsError::VirtualSumDoesNotFitU96)?;
270
271 virtual_sum / virtual_count_fixed
272 };
273
274 let variance = {
275 let sum_squared_error: FixedU128<U32> = vals
276 .iter()
277 .map(|WeightedValue { value, weight }| {
278 let value = FixedU128::<U32>::from_num(*value);
280 let weight = FixedU128::<U32>::from_num(u32::from(*weight));
281
282 square(absdiff(value, average)) * weight
283 })
284 .sum();
285 sum_squared_error / virtual_count_fixed
286 };
287
288 let indexed_vals = IndexedNumbers::from_unsorted_weighted_value_vec(&mut vals)
289 .expect("virtual_count is limited to u64 range above");
290 drop(vals);
292 assert_eq!(indexed_vals.virtual_len(), virtual_count);
293
294 let median = {
297 let mid = indexed_vals.virtual_len() / 2;
298 if 0 == indexed_vals.virtual_len() % 2 {
299 (indexed_vals[mid - 1], indexed_vals[mid]).average()
302 } else {
303 indexed_vals[mid]
304 }
305 };
306
307 let vals_distances = (virtual_count - 1) as f64;
317 let tiles_distances = (TILE_COUNT - 1) as f64;
318 let mut tiles = Vec::new();
319 for i in 0..TILE_COUNT {
320 let index = i as f64 / tiles_distances * vals_distances + 0.5;
321 let val = indexed_vals[index as u64];
322 tiles.push(val);
323 }
324
325 assert_eq!(indexed_vals.first(), tiles.first());
326 assert_eq!(indexed_vals.last(), tiles.last());
327
328 Ok(Stats {
329 view_type: PhantomData::default(),
330 num_values: usize::try_from(virtual_count)
331 .map_err(|_| StatsError::VirtualCountDoesNotFitUSize)?,
332 sum: virtual_sum,
333 average,
334 median,
335 variance,
336 tiles,
337 })
338 }
339}
340
341pub enum SubStats<ViewType, const TILE_COUNT: usize> {
347 Count(Stats<u64, TILE_COUNT>),
348 ViewType(Stats<ViewType, TILE_COUNT>),
349}
350
351#[derive(Debug, Clone, Copy)]
352pub enum SubStatsKind {
353 Count,
354 ViewType,
355}
356
357impl<ViewType: From<u64> + ToStatsString, const TILE_COUNT: usize> TableViewRow<SubStatsKind>
358 for SubStats<ViewType, TILE_COUNT>
359{
360 fn table_view_header(
361 ctx: SubStatsKind,
362 ) -> Box<dyn AsRef<[(Cow<'static, str>, Unit, ColumnFormatting)]>> {
363 eprintln!("XX weirdly this is never used??");
364 match ctx {
365 SubStatsKind::Count => Stats::<u64, TILE_COUNT>::table_view_header(()),
366 SubStatsKind::ViewType => Stats::<ViewType, TILE_COUNT>::table_view_header(()),
367 }
368 }
369
370 fn table_view_row(&self, out: &mut Vec<(Cow<str>, Highlight)>) {
371 match self {
372 SubStats::Count(stats) => stats.table_view_row(out),
373 SubStats::ViewType(stats) => stats.table_view_row(out),
374 }
375 }
376}
377
378impl<ViewType: From<u64> + ToStatsString, const TILE_COUNT: usize> TableViewRow<()>
379 for Stats<ViewType, TILE_COUNT>
380{
381 fn table_view_header(_: ()) -> Box<dyn AsRef<[(Cow<'static, str>, Unit, ColumnFormatting)]>> {
382 let mut cols = vec![
383 ("n".into(), Unit::Count, ColumnFormatting::Number),
384 (
385 "sum".into(),
386 Unit::ViewType(ViewType::UNIT_SHORT),
387 ColumnFormatting::Number,
388 ),
389 (
390 "avg".into(),
391 Unit::ViewType(ViewType::UNIT_SHORT),
392 ColumnFormatting::Number,
393 ),
394 (
395 "median".into(),
396 Unit::ViewType(ViewType::UNIT_SHORT),
397 ColumnFormatting::Number,
398 ),
399 (
400 "SD".into(),
401 Unit::ViewType(ViewType::UNIT_SHORT),
402 ColumnFormatting::Number,
403 ),
404 ("".into(), Unit::None, ColumnFormatting::Spacer),
405 ];
406
407 for i in 0..TILE_COUNT {
408 cols.push((
409 format!("{:.2}", (i as f64) / ((TILE_COUNT - 1) as f64)).into(),
410 Unit::ViewType(ViewType::UNIT_SHORT),
411 ColumnFormatting::Number,
412 ));
413 }
414 Box::new(cols)
415 }
416
417 fn table_view_row(&self, out: &mut Vec<(Cow<str>, Highlight)>) {
418 let Self {
419 view_type: _,
420 num_values,
421 sum,
422 average: _,
424 variance: _,
426 median,
427 tiles,
428 } = self;
429
430 out.push((num_values.to_string().into(), Highlight::Neutral));
431 out.push((
432 ViewType::from(u64::try_from(*sum).expect("sum must fit in u64 range"))
433 .to_stats_string()
434 .into(),
435 Highlight::Neutral,
436 ));
437 out.push((
438 ViewType::from(self.average_u64()).to_stats_string().into(),
439 Highlight::Neutral,
440 ));
441 out.push((
442 ViewType::from(*median).to_stats_string().into(),
443 Highlight::Neutral,
444 ));
445 out.push((
446 ViewType::from(self.standard_deviation_u64())
449 .to_stats_string()
450 .into(),
451 Highlight::Neutral,
452 ));
453
454 out.push(("".into(), Highlight::Spacer));
455
456 for val in tiles {
457 out.push((
458 ViewType::from(*val).to_stats_string().into(),
459 Highlight::Neutral,
460 ));
461 }
462 }
463}
464
465#[cfg(test)]
466mod tests {
467 use anyhow::Result;
468
469 use crate::stats_tables::stats::weighted::WEIGHT_ONE;
470
471 use super::*;
472
473 fn weighted(vals: &[u64]) -> Vec<WeightedValue> {
474 vals.iter()
475 .copied()
476 .map(|value| WeightedValue {
477 value,
478 weight: WEIGHT_ONE,
479 })
480 .collect()
481 }
482
483 #[test]
484 fn t_average_and_tiles_and_median() -> Result<()> {
485 let data = weighted(&[23, 4, 8, 30, 7]);
486 let stats = Stats::<u64, 4>::from_values(data)?;
487 assert_eq!(stats.average_u64(), 14); assert_eq!((stats.average * 10).round(), 144); assert_eq!(stats.tiles, [4, 7, 23, 30]); assert_eq!(stats.median, 8);
491 assert_eq!((stats.variance * 100).round(), 10424); assert_eq!(stats.standard_deviation_u64(), 10); let data = weighted(&[23, 4, 8, 31, 7]);
495 let stats = Stats::<u64, 4>::from_values(data)?;
496 assert_eq!(stats.average_u64(), 15); assert_eq!((stats.average * 10).round(), 146); assert_eq!(stats.tiles[0], 4);
499 assert_eq!(stats.tiles.len(), 4);
500 assert_eq!(stats.tiles[3], 31);
501 assert_eq!(stats.tiles, [4, 7, 23, 31]); assert_eq!(stats.median, 8);
503 assert_eq!((stats.variance * 100).round(), 11064); assert_eq!(stats.standard_deviation_u64(), 11); let data = weighted(&[23, 4, 8, 7]);
507 let stats = Stats::<u64, 4>::from_values(data)?;
508 assert_eq!(stats.average_u64(), 11); assert_eq!(stats.average, 10.5);
510 assert_eq!(stats.tiles, [4, 7, 8, 23]);
511 assert_eq!(stats.median, 8); let data = weighted(&[23, 8, 7]);
514 let stats = Stats::<u64, 4>::from_values(data)?;
515 assert_eq!(stats.average_u64(), 13); assert_eq!(stats.average.to_num::<f64>(), 12.666666666511446); assert_eq!(stats.median, 8);
518 assert_eq!(stats.tiles, [7, 8, 8, 23]);
519
520 Ok(())
521 }
522
523 #[test]
524 fn t_median() -> Result<()> {
525 let data = weighted(&[23, 4, 8, 7]);
526 let stats = Stats::<u64, 4>::from_values(data)?;
527 assert_eq!(stats.median, 8); let data = weighted(&[23, 4, 9, 7]);
530 let stats = Stats::<u64, 4>::from_values(data)?;
531 assert_eq!(stats.median, 8); let data = weighted(&[23, 4, 10, 7]);
534 let stats = Stats::<u64, 4>::from_values(data)?;
535 assert_eq!(stats.median, 9); let data = weighted(&[23, 4, 10, 7]);
538 let stats = Stats::<u64, 2>::from_values(data)?;
539 assert_eq!(stats.tiles, [4, 23]);
540 assert_eq!(stats.median, 9); let data = weighted(&[23, 4, 7]);
544 let stats = Stats::<u64, 2>::from_values(data)?;
545 assert_eq!(stats.median, 7);
546
547 let data = weighted(&[23, 4, 7]);
548 let stats = Stats::<u64, 3>::from_values(data)?;
549 assert_eq!(stats.median, 7);
550
551 let data = weighted(&[23, 4, 7]);
552 let stats = Stats::<u64, 4>::from_values(data)?;
553 assert_eq!(stats.median, 7);
554
555 let data = weighted(&[23, 4]);
556 let stats = Stats::<u64, 4>::from_values(data)?;
557 assert_eq!(stats.median, 14); Ok(())
560 }
561
562 #[test]
563 fn t_weights() -> Result<()> {
564 let data = weighted(&[23, 4, 9, 4, 4, 7]); let stats = Stats::<u64, 4>::from_values(data)?;
566 assert_eq!(stats.average, 8.5);
567 assert_eq!(stats.median, 6); assert_eq!(stats.variance.to_num::<f64>(), 45.58333333325572); let mut data = weighted(&[23, 4, 9, 7]);
571 data.push(WeightedValue {
572 value: 4,
573 weight: 2.try_into().unwrap(),
574 });
575 let stats = Stats::<u64, 4>::from_values(data)?;
576 assert_eq!(stats.median, 6); assert_eq!(stats.variance.to_num::<f64>(), 45.58333333325572); Ok(())
580 }
581}