chj_rustbin/text/
parseutil.rs1use anyhow::{anyhow, bail, Result};
2
3pub fn is_all(s: &str, pred: impl Fn(char) -> bool) -> bool {
4 s.chars().all(pred)
5}
6
7pub fn is_all_white(s: &str) -> bool {
8 is_all(s, |c| c.is_ascii_whitespace())
9}
10
11pub fn key_val(s: &str) -> Option<(&str, &str)> {
16 let i = s.find(':')?;
17 Some((&s[0..i], &s[i + 1..]))
18}
19
20pub fn first_rest(s: &str) -> Option<(char, &str)> {
21 let c = s.chars().next()?;
22 Some((c, &s[c.len_utf8()..]))
23}
24
25pub fn drop_white(s: &str) -> &str {
26 let mut p = s;
28 while let Some((c, r)) = first_rest(p) {
29 if !c.is_ascii_whitespace() {
30 return p;
31 }
32 p = r;
33 }
34 p
35}
36
37pub fn drop_white_end(s: &str) -> &str {
39 for (i, b) in s.bytes().rev().enumerate() {
40 if !(b as char).is_ascii_whitespace() {
41 return &s[0..s.len() - i];
42 }
43 }
44 return "";
45}
46
47pub fn after_white(s: &str) -> Option<&str> {
48 let s2 = drop_white(s);
49 if std::ptr::eq(s, s2) {
50 None
51 } else {
52 Some(s2)
53 }
54}
55
56pub fn cleanwhite(s: &str) -> &str {
57 drop_white(drop_white_end(s))
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64
65 #[test]
66 fn t_drop_white() {
67 fn t(inp: &str, outp: &str) {
68 assert_eq!(drop_white(inp), outp)
69 }
70 t("foo", "foo");
71 t(" foo", "foo");
72 t("foo ", "foo ");
73 t(" foo ", "foo ");
74 t(" f oo ", "f oo ");
75 t(" ", "");
76 }
77
78 #[test]
79 fn t_drop_white_end() {
80 fn t(inp: &str, outp: &str) {
81 assert_eq!(drop_white_end(inp), outp)
82 }
83 t("foo", "foo");
84 t(" foo", " foo");
85 t("foo ", "foo");
86 t(" foo ", " foo");
87 t(" f oo ", " f oo");
88 t(" ", "");
89 }
90
91 #[test]
92 fn t_cleanwhite() {
93 fn t(inp: &str, outp: &str) {
94 assert_eq!(cleanwhite(inp), outp)
95 }
96 t("foo", "foo");
97 t(" foo", "foo");
98 t("foo ", "foo");
99 t(" foo ", "foo");
100 t(" f oo ", "f oo");
101 t(" ", "");
102 }
103}
104
105pub fn take_while(s: &str, pred: impl Fn(char) -> bool) -> (&str, &str) {
106 let mut it = s.chars().enumerate();
107 while let Some((i, c)) = it.next() {
108 if !pred(c) {
109 return (&s[0..i], &s[i..]);
110 }
111 }
112 (s, "")
113}
114
115pub fn parse_hex_digit(c: char) -> Result<u32> {
116 let n = c as u32;
117 if (n >= '0' as u32) && (n <= '9' as u32) {
118 Ok(n - ('0' as u32))
119 } else if (n >= 'a' as u32) && (n <= 'f' as u32) {
120 Ok(n - ('a' as u32) + 10)
121 } else if (n >= 'A' as u32) && (n <= 'F' as u32) {
122 Ok(n - ('A' as u32) + 10)
123 } else {
124 bail!("invalid hex digit {c:?}")
125 }
126}
127
128pub fn next_hex_digit<I>(cs: &mut I) -> Result<u32>
129where
130 I: Iterator<Item = char>,
131{
132 parse_hex_digit(cs.next().ok_or_else(|| anyhow!("hex string too short"))?)
133}
134
135pub fn parse_hex<const N: usize>(s: &str) -> Result<[u8; N]> {
136 let mut r = [0; N];
137 let mut cs = s.chars();
138 for i in 0..N {
139 let a = next_hex_digit(&mut cs)?;
140 let b = next_hex_digit(&mut cs)?;
141 r[i] = (a * 16 + b) as u8;
142 }
143 Ok(r)
144}
145
146pub fn char_is_white(c: char) -> bool {
147 c.is_ascii_whitespace()
148}
149
150pub fn drop_n(s: &str, n: usize, f: impl Fn(char) -> bool) -> Result<&str> {
151 let mut p = s;
152 let mut i = 0;
153 while i < n {
154 if let Some((c, r)) = first_rest(p) {
155 if !f(c) {
156 bail!(
157 "drop_n: non-matching character after {i} \
158 instead of {n} characters"
159 )
160 }
161 p = r;
162 } else {
163 bail!("drop_n: end of string after {i} instead of {n} characters")
164 }
165 i += 1;
166 }
167 Ok(&s[n..])
168}
169
170pub fn parse_byte_multiplier(s: &str) -> Result<u64> {
171 if s == "B" {
172 Ok(1)
173 } else if s == "KiB" {
174 Ok(1024)
175 } else if s == "MiB" {
176 Ok(1024 * 1024)
177 } else if s == "GiB" {
178 Ok(1024 * 1024 * 1024)
179 } else if s == "TiB" {
180 Ok(1024 * 1024 * 1024 * 1024)
181 } else if s == "PiB" {
182 Ok(1024 * 1024 * 1024 * 1024 * 1024)
183 } else {
184 bail!("unknown multiplier {s:?}")
185 }
186}