chj_rustbin/text/
parseutil.rs

1use anyhow::{anyhow, bail, Result};
2
3pub fn is_all(s: &str, pred: impl Fn(char) -> bool) -> bool {
4    s.chars().all(pred)
5}
6
7pub fn is_all_white(s: &str) -> bool {
8    is_all(s, |c| c.is_ascii_whitespace())
9}
10
11// pub fn is_all_alphanum(s: &str) -> bool {
12//     is_all(s, |c| c.is_alphanumeric())
13// }
14
15pub fn key_val(s: &str) -> Option<(&str, &str)> {
16    let i = s.find(':')?;
17    Some((&s[0..i], &s[i + 1..]))
18}
19
20pub fn first_rest(s: &str) -> Option<(char, &str)> {
21    let c = s.chars().next()?;
22    Some((c, &s[c.len_utf8()..]))
23}
24
25pub fn drop_white(s: &str) -> &str {
26    // s.chars().skip_while(|c| c.is_ascii_whitespace())  but now ?  lol
27    let mut p = s;
28    while let Some((c, r)) = first_rest(p) {
29        if !c.is_ascii_whitespace() {
30            return p;
31        }
32        p = r;
33    }
34    p
35}
36
37/// drop whitespace from the end
38pub fn drop_white_end(s: &str) -> &str {
39    for (i, b) in s.bytes().rev().enumerate() {
40        if !(b as char).is_ascii_whitespace() {
41            return &s[0..s.len() - i];
42        }
43    }
44    return "";
45}
46
47pub fn after_white(s: &str) -> Option<&str> {
48    let s2 = drop_white(s);
49    if std::ptr::eq(s, s2) {
50        None
51    } else {
52        Some(s2)
53    }
54}
55
56pub fn cleanwhite(s: &str) -> &str {
57    // I forgot the std function, there was one, right? Just:
58    drop_white(drop_white_end(s))
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64
65    #[test]
66    fn t_drop_white() {
67        fn t(inp: &str, outp: &str) {
68            assert_eq!(drop_white(inp), outp)
69        }
70        t("foo", "foo");
71        t("  foo", "foo");
72        t("foo  ", "foo  ");
73        t(" foo  ", "foo  ");
74        t(" f oo  ", "f oo  ");
75        t("  ", "");
76    }
77
78    #[test]
79    fn t_drop_white_end() {
80        fn t(inp: &str, outp: &str) {
81            assert_eq!(drop_white_end(inp), outp)
82        }
83        t("foo", "foo");
84        t("  foo", "  foo");
85        t("foo  ", "foo");
86        t(" foo  ", " foo");
87        t(" f oo  ", " f oo");
88        t("  ", "");
89    }
90
91    #[test]
92    fn t_cleanwhite() {
93        fn t(inp: &str, outp: &str) {
94            assert_eq!(cleanwhite(inp), outp)
95        }
96        t("foo", "foo");
97        t("  foo", "foo");
98        t("foo  ", "foo");
99        t(" foo  ", "foo");
100        t(" f oo  ", "f oo");
101        t("  ", "");
102    }
103}
104
105pub fn take_while(s: &str, pred: impl Fn(char) -> bool) -> (&str, &str) {
106    let mut it = s.chars().enumerate();
107    while let Some((i, c)) = it.next() {
108        if !pred(c) {
109            return (&s[0..i], &s[i..]);
110        }
111    }
112    (s, "")
113}
114
115pub fn parse_hex_digit(c: char) -> Result<u32> {
116    let n = c as u32;
117    if (n >= '0' as u32) && (n <= '9' as u32) {
118        Ok(n - ('0' as u32))
119    } else if (n >= 'a' as u32) && (n <= 'f' as u32) {
120        Ok(n - ('a' as u32) + 10)
121    } else if (n >= 'A' as u32) && (n <= 'F' as u32) {
122        Ok(n - ('A' as u32) + 10)
123    } else {
124        bail!("invalid hex digit {c:?}")
125    }
126}
127
128pub fn next_hex_digit<I>(cs: &mut I) -> Result<u32>
129where
130    I: Iterator<Item = char>,
131{
132    parse_hex_digit(cs.next().ok_or_else(|| anyhow!("hex string too short"))?)
133}
134
135pub fn parse_hex<const N: usize>(s: &str) -> Result<[u8; N]> {
136    let mut r = [0; N];
137    let mut cs = s.chars();
138    for i in 0..N {
139        let a = next_hex_digit(&mut cs)?;
140        let b = next_hex_digit(&mut cs)?;
141        r[i] = (a * 16 + b) as u8;
142    }
143    Ok(r)
144}
145
146pub fn char_is_white(c: char) -> bool {
147    c.is_ascii_whitespace()
148}
149
150pub fn drop_n(s: &str, n: usize, f: impl Fn(char) -> bool) -> Result<&str> {
151    let mut p = s;
152    let mut i = 0;
153    while i < n {
154        if let Some((c, r)) = first_rest(p) {
155            if !f(c) {
156                bail!(
157                    "drop_n: non-matching character after {i} \
158                       instead of {n} characters"
159                )
160            }
161            p = r;
162        } else {
163            bail!("drop_n: end of string after {i} instead of {n} characters")
164        }
165        i += 1;
166    }
167    Ok(&s[n..])
168}
169
170pub fn parse_byte_multiplier(s: &str) -> Result<u64> {
171    if s == "B" {
172        Ok(1)
173    } else if s == "KiB" {
174        Ok(1024)
175    } else if s == "MiB" {
176        Ok(1024 * 1024)
177    } else if s == "GiB" {
178        Ok(1024 * 1024 * 1024)
179    } else if s == "TiB" {
180        Ok(1024 * 1024 * 1024 * 1024)
181    } else if s == "PiB" {
182        Ok(1024 * 1024 * 1024 * 1024 * 1024)
183    } else {
184        bail!("unknown multiplier {s:?}")
185    }
186}