ahtml/
allocator.rs

1use std::{
2    cell::RefCell,
3    cmp::max,
4    collections::HashSet,
5    fmt::Display,
6    marker::PhantomData,
7    mem::ManuallyDrop,
8    ops::Deref,
9    panic::RefUnwindSafe,
10    sync::{atomic::AtomicBool, Arc, Mutex},
11};
12
13use ahtml_html::meta::{ElementMeta, MetaDb};
14use anyhow::{anyhow, bail, Result};
15use backtrace::Backtrace;
16use chj_util::{partialbacktrace::PartialBacktrace, warn};
17use kstring::KString;
18use lazy_static::lazy_static;
19
20use crate::{more_vec::MoreVec, myfrom::MyFrom, stillvec::StillVec};
21
22pub type Context = Arc<dyn Display + Sync + Send + RefUnwindSafe>;
23
24// once again
25fn all_whitespace(s: &str) -> bool {
26    s.chars().all(|c| c.is_ascii_whitespace())
27}
28
29#[derive(Debug)]
30pub enum AllocKind {
31    Att,
32    Node,
33    Id,
34}
35
36pub trait AllocatorType {
37    fn allockind() -> AllocKind;
38}
39
40impl AllocatorType for (KString, KString) {
41    fn allockind() -> AllocKind {
42        AllocKind::Att
43    }
44}
45
46impl AllocatorType for Node {
47    fn allockind() -> AllocKind {
48        AllocKind::Node
49    }
50}
51
52impl<T> AllocatorType for AId<T> {
53    fn allockind() -> AllocKind {
54        AllocKind::Id
55    }
56}
57// ^ and again not id target specific.  Or should I have different
58// AllocKind ones.
59
60pub struct HtmlAllocatorPool {
61    allocator_max_use_count: u16,
62    max_allocations: u32,            // See HtmlAllocator
63    metadb: Option<&'static MetaDb>, // See HtmlAllocator
64    /// Information about the pool, e.g. where it was created or what
65    /// document it is used for.
66    context: Context,
67    allocators: Mutex<Vec<HtmlAllocator>>,
68}
69
70impl HtmlAllocatorPool {
71    /// `allocator_max_use_count` is the number of times an
72    /// HtmlAllocator should be re-used. For the other arguments, see
73    /// `HtmlAllocator::new_with_metadb`.
74    pub fn new_with_metadb(
75        allocator_max_use_count: u16,
76        max_allocations: u32,
77        metadb: Option<&'static MetaDb>,
78        context: Context,
79    ) -> Self {
80        HtmlAllocatorPool {
81            allocator_max_use_count,
82            max_allocations,
83            metadb,
84            context,
85            allocators: Mutex::new(Vec::new()),
86        }
87    }
88    pub fn get<'p>(&'p self) -> HtmlAllocatorGuard<'p> {
89        let mut l = self.allocators.lock().unwrap();
90        let a = l.pop().unwrap_or_else(|| {
91            HtmlAllocator::new_with_metadb(
92                self.max_allocations,
93                self.metadb.clone(),
94                self.context.clone(),
95            )
96        });
97        HtmlAllocatorGuard {
98            pool: self,
99            html_allocator: ManuallyDrop::new(a),
100        }
101    }
102}
103
104pub struct HtmlAllocatorGuard<'p> {
105    pool: &'p HtmlAllocatorPool,
106    html_allocator: ManuallyDrop<HtmlAllocator>,
107}
108
109impl<'p> Deref for HtmlAllocatorGuard<'p> {
110    type Target = HtmlAllocator;
111
112    #[inline(always)]
113    fn deref(&self) -> &Self::Target {
114        &self.html_allocator
115    }
116}
117
118impl<'p> Drop for HtmlAllocatorGuard<'p> {
119    fn drop(&mut self) {
120        let mut a = unsafe { ManuallyDrop::take(&mut self.html_allocator) };
121        if a.regionid.generation < self.pool.allocator_max_use_count {
122            a.clear();
123            // Insert it back into the pool:
124            let mut l = self.pool.allocators.lock().unwrap();
125            l.push(a);
126        }
127    }
128}
129
130unsafe impl<'p> Send for HtmlAllocatorGuard<'p> {}
131
132pub struct HtmlAllocator {
133    context: Context,
134    // For dynamic verification of AId:s, also the generation counter
135    // is used to stop reusing the allocator at some point to free up
136    // unused memory.
137    regionid: RegionId,
138    // If present, DOM structure validation is done (at runtime):
139    metadb: Option<&'static MetaDb>,
140    // The top capacity value, as passed by the user
141    max_allocations: usize,
142    // Storage for attributes:
143    atts: StillVec<Option<(KString, KString)>>,
144    // Storage for nodes:
145    nodes: StillVec<Option<Node>>,
146    // Storage for references to attributes and nodes:
147    ids: RefCell<Vec<u32>>, // for attribute or Node, depending on slot
148    // Temporary storage for serialisation:
149    pub(crate) html_escape_tmp: RefCell<Vec<u8>>,
150}
151
152lazy_static! {
153    static ref NEXT_ALLOCATOR_ID: Mutex<u16> = Mutex::new(0);
154}
155fn next_allocator_id() -> u16 {
156    // replace with atomic inc?
157    let mut guard = NEXT_ALLOCATOR_ID.lock().unwrap();
158    let id = *guard;
159    *guard = id.wrapping_add(1);
160    id
161}
162
163pub trait ToASlice<T> {
164    fn to_aslice(self, allocator: &HtmlAllocator) -> Result<ASlice<T>>;
165}
166
167pub static AHTML_TRACE: AtomicBool = AtomicBool::new(false);
168
169impl HtmlAllocator {
170    /// `max_allocations`: how many node (text, elements, empty nodes)
171    /// and attribute allocations in total are allowed before out of
172    /// memory errors are being returned for allocations (i.e. when
173    /// creating new elements, attributes, or pushing to an
174    /// `AVec`). `metadb`: if given, HTML structure is verified during
175    /// element allocation.
176    pub fn new_with_metadb(
177        max_allocations: u32,
178        metadb: Option<&'static MetaDb>,
179        context: Context,
180    ) -> Self {
181        let max_allocations = max_allocations as usize;
182        let half_max_alloc = max_allocations / 2;
183        HtmlAllocator {
184            context,
185            regionid: RegionId {
186                allocator_id: next_allocator_id(),
187                generation: 0,
188            },
189            // Assume that attributes are relatively rare, even
190            // half_max_alloc seems overly many, well.
191            atts: StillVec::with_capacity(half_max_alloc),
192            // Even though ids <= nodes + atts, we don't know how the
193            // distribution between nodes and atts will be, so have to
194            // allocate nodes with (close to) the max, too.
195            nodes: StillVec::with_capacity(max_allocations),
196            ids: RefCell::new(Vec::with_capacity(max_allocations)),
197            metadb,
198            max_allocations,
199            html_escape_tmp: RefCell::new(Vec::new()),
200        }
201    }
202
203    pub fn clear(&mut self) {
204        self.atts.exclusive_clear();
205        self.nodes.exclusive_clear();
206        self.ids.borrow_mut().clear();
207        // Maybe in the future want to let regions be reusable
208        // forever. So, don't `+= 1`!
209        self.regionid.generation = self.regionid.generation.wrapping_add(1);
210    }
211
212    fn out_of_memory_error(&self, which_vec: &str, capacity: usize) -> anyhow::Error {
213        anyhow!(
214            "HtmlAllocator: reached the capacity {capacity} of the {which_vec} region \
215             due to the configured max_allocations limit of {} -- {}",
216            self.max_allocations,
217            self.context
218        )
219    }
220
221    pub fn regionid(&self) -> RegionId {
222        self.regionid
223    }
224    pub fn assert_regionid(&self, rid: RegionId) {
225        if rid != self.regionid {
226            panic!("regionid mismatch")
227        }
228    }
229
230    fn id_to_index<T>(&self, id: AId<T>) -> usize {
231        self.id_to_bare(id) as usize
232    }
233
234    fn id_to_bare<T>(&self, id: AId<T>) -> u32 {
235        if self.regionid == id.regionid {
236            id.id
237        } else {
238            panic!(
239                "AId with incompatible RegionId used: expected {:?}, got {:?}",
240                self.regionid, id.regionid
241            );
242        }
243    }
244
245    // first AId is for our own position, second one for position in target T.
246    // Now why is first one not maybe AId<AId<T>>?
247    fn set_id<T: AllocatorType>(&self, id_bare: u32, val: AId<T>) {
248        self.ids.borrow_mut()[id_bare as usize] = self.id_to_bare(val);
249    }
250
251    pub fn get_node<'a>(&'a self, id: AId<Node>) -> Option<&'a Node> {
252        if let Some(v) = self.nodes.get(self.id_to_index(id)) {
253            if let Some(n) = v {
254                Some(n)
255            } else {
256                // "uninitialized" memory
257                None
258            }
259        } else {
260            // id behind end of memory
261            None
262        }
263    }
264
265    // COPY-PASTE of above
266    pub fn get_att<'a>(&'a self, id: AId<(KString, KString)>) -> Option<&'a (KString, KString)> {
267        if let Some(v) = self.atts.get(self.id_to_index(id)) {
268            if let Some(n) = v {
269                Some(n)
270            } else {
271                // "uninitialized" memory
272                None
273            }
274        } else {
275            // id behind end of memory
276            None
277        }
278    }
279
280    // For within ids. To get the id, to be of type T.
281    pub fn get_id<T: AllocatorType>(&self, id_bare: u32) -> Option<AId<T>> {
282        // Blindly trusting that the id we are retrieving is pointing
283        // to T (XX btw why using a mixed pool for ids, when using
284        // separate ones for the objects?)
285        self.ids
286            .borrow()
287            .get(id_bare as usize)
288            .map(|id2| AId::new(self.regionid, *id2))
289    }
290
291    // it's actually a vec of AId, but for T
292    pub fn new_vec<'a, T: AllocatorType>(&'a self) -> AVec<'a, T> {
293        AVec::new(self)
294    }
295
296    pub fn new_vec_with_capacity<'a, T: AllocatorType>(
297        &'a self,
298        capacity: u32,
299    ) -> Result<AVec<'a, T>> {
300        AVec::new_with_capacity(self, capacity)
301    }
302
303    /// But also see element method for more comfort.
304    pub fn new_element(
305        &self,
306        meta: &'static ElementMeta,
307        // The slices must be for storage in this
308        // HtmlAllocator! XX could this be improved?
309        attr: ASlice<(KString, KString)>,
310        body: ASlice<Node>,
311    ) -> Result<AId<Node>> {
312        // verify
313        if let Some(global_meta) = self.metadb {
314            {
315                let allowed = &meta.attributes;
316                for (i, att) in attr.iter_att(self).enumerate() {
317                    if global_meta.global_attribute_names.contains(&att.0) {
318                        // OK; XX verify attribute value, too, but
319                        // don't have the data yet.
320                    } else if let Some(_a) = allowed.get(&att.0) {
321                        // OK; XX: todo: verify attribute value, too
322                    } else {
323                        let mut allowednamesset = allowed
324                            .keys()
325                            .map(|k| k.clone())
326                            .collect::<HashSet<KString>>();
327                        allowednamesset
328                            .extend(global_meta.global_attribute_names.iter().map(|k| k.clone()));
329                        let mut allowednames: Vec<&str> =
330                            allowednamesset.iter().map(|v| v.as_str()).collect();
331                        allowednames.sort();
332                        bail!(
333                            "invalid attribute #{i} {:?} for element {:?} \
334                               (valid: {:?})\n{:?}",
335                            att.0.as_str(),
336                            meta.tag_name.as_str(),
337                            allowednames,
338                            Backtrace::new()
339                        )
340                    }
341                }
342            }
343            {
344                let allowed = &meta.child_elements;
345                for (i, node) in body.iter_node(self).enumerate() {
346                    let verify_child_element_meta = |child_meta: &ElementMeta| -> Result<()> {
347                        if !allowed.contains(&child_meta.tag_name) {
348                            let mut allowednames: Vec<&str> =
349                                allowed.iter().map(|k| k.as_str()).collect();
350                            allowednames.sort();
351                            bail!(
352                                "content value #{i}: element {:?} not allowed as \
353                                   a child of element {:?}, only: {:?}{}\n{:?}",
354                                child_meta.tag_name.as_str(),
355                                meta.tag_name.as_str(),
356                                allowednames,
357                                if meta.allows_child_text {
358                                    " as well as text"
359                                } else {
360                                    " (no text)"
361                                },
362                                Backtrace::new()
363                            )
364                        }
365                        Ok(())
366                    };
367                    match &*node {
368                        Node::Element(elt) => verify_child_element_meta(elt.meta)?,
369                        Node::String(s) => {
370                            if (!meta.allows_child_text) && (!all_whitespace(s.as_str())) {
371                                let mut allowednames: Vec<&str> =
372                                    allowed.iter().map(|k| k.as_str()).collect();
373                                allowednames.sort();
374                                bail!(
375                                    "content value #{i}: text is not allowed as \
376                                       a child of element {:?}, only: {:?}\n{:?}",
377                                    meta.tag_name.as_str(),
378                                    allowednames,
379                                    Backtrace::new()
380                                )
381                            }
382                        }
383                        Node::Preserialized(ser) => verify_child_element_meta(ser.meta)?,
384                        Node::None => {}
385                    }
386                }
387            }
388        }
389
390        let mut attr = attr;
391        if AHTML_TRACE.load(std::sync::atomic::Ordering::Relaxed) {
392            let mut seen_title = false;
393            let mut vec = self.new_vec_with_capacity(attr.len + 1)?;
394            for id in attr.iter_aid(&self) {
395                let r = self.get_att(id).expect("exists because it's in attr");
396                if r.0 == "title" {
397                    seen_title = true;
398                }
399                vec.push(id)?;
400            }
401            let bt_str = PartialBacktrace::new().part_to_string(1, "src/rouille_runner.rs");
402            if seen_title {
403                warn!(
404                    "element {:?} already has 'title' attribute, not adding tracing at:\n\
405                       {bt_str}",
406                    &*meta.tag_name
407                );
408            } else {
409                vec.push(self.attribute(
410                    "title",
411                    format!(
412                        "Generated at:\n\
413                                                          {bt_str}"
414                    ),
415                )?)?;
416            }
417            attr = vec.to_aslice(self)?;
418        }
419
420        // (Note: now can get .len() even though that can update even
421        // though we don't have unique access to nodes here. Only
422        // through sequencing (this is not Sync) we know that it isn't
423        // other than through the `push_within_capacity_` call, which
424        // was in the same borrow scope before, "too".)
425        let id_ = self.nodes.len();
426        self.nodes
427            .push_within_capacity(Some(Node::Element(Element { meta, attr, body })))
428            .map_err(|_e| self.out_of_memory_error("nodes", self.nodes.capacity()))?;
429        Ok(AId::new(self.regionid, id_ as u32))
430    }
431
432    // XX naming needs work (new_element, element, (add_element), allocate_element).
433    pub fn allocate_element(&self, elt: Element) -> Result<AId<Node>> {
434        self.new_element(elt.meta, elt.attr, elt.body)
435    }
436
437    fn new_string(&self, s: KString) -> Result<AId<Node>> {
438        // much COPY-PASTE always
439        let id_ = self.nodes.len();
440        self.nodes
441            .push_within_capacity(Some(Node::String(s)))
442            .map_err(|_e| self.out_of_memory_error("nodes", self.nodes.capacity()))?;
443        Ok(AId::new(self.regionid, id_ as u32))
444    }
445    pub fn empty_node(&self) -> Result<AId<Node>> {
446        // much COPY-PASTE always
447        let id_ = self.nodes.len();
448        self.nodes
449            .push_within_capacity(Some(Node::None))
450            .map_err(|_e| self.out_of_memory_error("nodes", self.nodes.capacity()))?;
451        Ok(AId::new(self.regionid, id_ as u32))
452    }
453
454    pub fn new_attribute(&self, att: (KString, KString)) -> Result<AId<(KString, KString)>> {
455        let id_ = self.atts.len();
456        self.atts
457            .push_within_capacity(Some(att))
458            .map_err(|_e| self.out_of_memory_error("atts", self.atts.capacity()))?;
459        Ok(AId::new(self.regionid, id_ as u32))
460    }
461    pub fn attribute<K, V>(&self, key: K, val: V) -> Result<AId<(KString, KString)>>
462    where
463        KString: MyFrom<K>,
464        KString: MyFrom<V>,
465    {
466        self.new_attribute((KString::myfrom(key), KString::myfrom(val)))
467    }
468
469    pub fn preserialized(&self, val: SerHtmlFrag) -> Result<AId<Node>> {
470        // ever copy-paste
471        let id_ = self.nodes.len();
472        // /copy-paste
473        self.nodes
474            .push_within_capacity(Some(Node::Preserialized(val)))
475            .map_err(|_e| self.out_of_memory_error("nodes", self.nodes.capacity()))?;
476        // copy-paste
477        Ok(AId::new(self.regionid, id_ as u32))
478    }
479
480    // Allocate a range of `AId`s. We never need to allocate ranges of
481    // Node or attribute values, those are only pushed one by one--we
482    // need alloc for `AVec` only and those only store `AId`s.
483    // Giving a `copy_range` essentially makes this a "realloc".
484    fn alloc(&self, n: u32, copy_range: Option<(u32, u32)>) -> Result<u32> {
485        let mut v = self.ids.borrow_mut();
486        let id = v.len();
487        let newlen = id + n as usize;
488        if newlen > v.capacity() {
489            return Err(self.out_of_memory_error("ids", v.capacity()));
490        }
491
492        if let Some((start, end)) = copy_range {
493            let oldn = end - start;
494            assert!(oldn < n);
495            v.extend_from_within_within_capacity(start as usize..end as usize)
496                .expect("can't happen since we checked newlen above");
497        }
498
499        // And additionally / in any case extend with the new space. Use `u32::MAX`
500        // as a weak marker for invalid id
501        v.resize(newlen, u32::MAX);
502        Ok(id as u32)
503    }
504
505    // // only for ids
506    // fn memmove<T>(&self, from: AId<&'a T>, to: AId<&'a T>, len: u32) {
507    // }
508
509    pub fn staticstr(&self, s: &'static str) -> Result<AId<Node>> {
510        self.new_string(KString::from_static(s))
511    }
512
513    pub fn str(&self, s: &str) -> Result<AId<Node>> {
514        self.new_string(KString::from_ref(s))
515    }
516
517    /// Create a text node (i.e. put the given string into the
518    /// allocator, return the id to it)
519    // (XX hmm, has issue with not offering KString &'static str
520    // optimization, right? This is only a small issue, though. Yes,
521    // use `str` method for that. AH, staticstr, rename it.)
522    pub fn text<T>(&self, s: T) -> Result<AId<Node>>
523    where
524        KString: MyFrom<T>,
525    {
526        self.new_string(KString::myfrom(s))
527    }
528
529    /// Like `text` but returns an `ASlice` containing the single text
530    /// node, for contexts that expect a slice (i.e. any number of
531    /// nodes).
532    pub fn text_slice<T>(&self, s: T) -> Result<ASlice<Node>>
533    where
534        KString: MyFrom<T>,
535    {
536        self.new_string(KString::myfrom(s))?.to_aslice(self)
537    }
538
539    // XX remove now that there's text()?
540    pub fn string(&self, s: String) -> Result<AId<Node>> {
541        self.new_string(KString::from(s))
542    }
543
544    // crazy with so many variants?, use a conversion trait?
545    pub fn opt_string(&self, s: Option<String>) -> Result<AId<Node>> {
546        match s {
547            Some(s) => self.new_string(KString::from(s)),
548            None => self.empty_node(),
549        }
550    }
551
552    // XX remove now that there's text()?
553    pub fn kstring(&self, s: KString) -> Result<AId<Node>> {
554        self.new_string(s)
555    }
556
557    // /// Create a transparent pseudo element with the given body; that
558    // /// body is flattened into the element's body where it is placed.
559    // pub fn flat(
560    //     &self,
561    //     body: impl ToASlice<Node>
562    // ) -> Result<AId<Node>>
563    // {
564    //     body.to_aslice(self)
565    // }
566
567    /// Create an element from normal slices or arrays, for nice to use
568    /// syntax.
569    pub fn element(
570        &self,
571        meta: &'static ElementMeta,
572        attr: impl ToASlice<(KString, KString)>,
573        body: impl ToASlice<Node>,
574    ) -> Result<AId<Node>> {
575        self.new_element(meta, attr.to_aslice(self)?, body.to_aslice(self)?)
576    }
577
578    /// A text node with just a non-breaking space.
579    pub fn nbsp(&self) -> Result<AId<Node>> {
580        // Cache and re-issue the same node?
581        self.str("\u{00A0}")
582    }
583
584    pub fn empty_slice<T>(&self) -> ASlice<T> {
585        ASlice {
586            t: PhantomData,
587            regionid: self.regionid,
588            len: 0,
589            start: 0,
590        }
591    }
592}
593
594#[derive(Debug, Clone, Copy, Eq, PartialEq)]
595pub struct RegionId {
596    allocator_id: u16, // constant
597    generation: u16,   // mutated
598}
599
600#[derive(Debug)]
601pub struct AId<T> {
602    t: PhantomData<fn() -> T>,
603    regionid: RegionId,
604    id: u32,
605}
606
607impl<T: AllocatorType> AId<T> {
608    fn new(regionid: RegionId, id: u32) -> AId<T> {
609        AId {
610            t: PhantomData,
611            regionid,
612            id,
613        }
614    }
615}
616
617// derive is broken when using PhantomData, so do it manually:
618impl<T> Clone for AId<T> {
619    fn clone(&self) -> Self {
620        Self {
621            t: PhantomData,
622            regionid: self.regionid,
623            id: self.id,
624        }
625    }
626}
627impl<T> Copy for AId<T> {}
628
629// AVec lives *outside* an allocator
630/// A vector that allocates its storage from a `HtmlAllocator`. When
631/// finished, convert to `ASlice` via `as_slice()`.
632pub struct AVec<'a, T: AllocatorType> {
633    t: PhantomData<T>,
634    allocator: &'a HtmlAllocator,
635    len: u32,
636    cap: u32,
637    start: u32, // bare Id for ids
638}
639
640impl<'a, T: AllocatorType> AVec<'a, T> {
641    // But actually keep private, only instantiate via HtmlAllocator::new_vec ?
642    pub fn new(allocator: &'a HtmlAllocator) -> AVec<'a, T> {
643        AVec {
644            t: PhantomData,
645            allocator,
646            len: 0,
647            cap: 0,
648            start: 0,
649        }
650    }
651
652    pub fn new_with_capacity(allocator: &'a HtmlAllocator, capacity: u32) -> Result<AVec<'a, T>> {
653        let start = allocator.alloc(capacity, None)?;
654        Ok(AVec {
655            t: PhantomData,
656            allocator,
657            len: 0,
658            cap: capacity,
659            start,
660        })
661    }
662
663    #[inline(always)]
664    pub fn allocator(&self) -> &'a HtmlAllocator {
665        self.allocator
666    }
667
668    pub fn len(&self) -> u32 {
669        self.len
670    }
671
672    pub fn push(&mut self, itemid: AId<T>) -> Result<()> {
673        if self.len == self.cap {
674            // let oldalloclen = self.allocator.ids.borrow().len();//debug
675            let newcap = max(self.cap * 2, 8);
676            // We always need space for AIds, not T::allockind()
677            let newstart = self
678                .allocator
679                .alloc(newcap, Some((self.start, self.start + self.len)))?;
680            // debug
681            // assert!((newstart.0 > self.start.0) ||
682            //         // the first allocation is at 0, and 0 is also in start then.
683            //         (newstart.0 == 0));
684            // let newalloclen = self.allocator.ids.borrow().len();
685            // assert_eq!(newalloclen - oldalloclen, newcap as usize);
686            // if self.len > 0 {
687            //     assert_eq!(self.allocator.get_id::<Node<'a>>(AId::new(self.start.0)).unwrap().0,
688            //                self.allocator.get_id::<Node<'a>>(AId::new(newstart.0)).unwrap().0);
689            // }
690            // /debug
691            self.start = newstart;
692            self.cap = newcap;
693        }
694        self.allocator
695            .set_id(self.len + (self.start as usize) as u32, itemid);
696        self.len += 1;
697        Ok(())
698    }
699
700    pub fn append<S: Into<ASlice<T>>>(&mut self, elements: S) -> Result<()> {
701        let aslice: ASlice<T> = elements.into();
702        for aid in aslice.iter_aid(self.allocator) {
703            self.push(aid)?;
704        }
705        Ok(())
706    }
707
708    pub fn as_slice(&self) -> ASlice<T> {
709        ASlice {
710            t: PhantomData,
711            regionid: self.allocator.regionid,
712            len: self.len,
713            start: self.start,
714        }
715    }
716
717    pub fn reverse(&mut self) {
718        let ids = &mut *self.allocator.ids.borrow_mut();
719        for i in 0..self.len / 2 {
720            ids.swap(
721                (self.start + i) as usize,
722                (self.start + self.len - 1 - i) as usize,
723            );
724        }
725    }
726
727    pub fn extend_from_slice(
728        &mut self,
729        slice: &ASlice<T>,
730        allocator: &HtmlAllocator,
731    ) -> Result<()> {
732        for aid in slice.iter_aid(allocator) {
733            self.push(aid)?;
734        }
735        Ok(())
736    }
737}
738
739// about storage *inside* an allocator, thus no allocator field. XX
740// could this be improved?
741/// A slice of stored `AId<T>`s inside a `HtmlAllocator`.
742#[derive(Debug)]
743pub struct ASlice<T> {
744    t: PhantomData<fn() -> T>,
745    regionid: RegionId,
746    pub(crate) len: u32,
747    pub(crate) start: u32, // id bare to retrieve an AId
748}
749
750// again, [derive(Clone)] can't handle it for Clone of T, so do it ourselves:
751impl<T> Clone for ASlice<T> {
752    fn clone(&self) -> Self {
753        Self {
754            t: self.t,
755            regionid: self.regionid,
756            len: self.len,
757            start: self.start,
758        }
759    }
760}
761impl<T> Copy for ASlice<T> {}
762
763pub struct ASliceNodeIterator<'a, T> {
764    allocator: &'a HtmlAllocator,
765    t: PhantomData<T>,
766    id: u32,
767    id_end: u32,
768}
769impl<'a, T> Iterator for ASliceNodeIterator<'a, T> {
770    type Item = &'a Node;
771    fn next(&mut self) -> Option<&'a Node> {
772        if self.id < self.id_end {
773            let r = self
774                .allocator
775                .get_id(self.id)
776                .expect("slice should always point to allocated storage");
777            let v = self
778                .allocator
779                .get_node(r)
780                .expect("stored ids should always resolve");
781            self.id += 1;
782            Some(v)
783        } else {
784            None
785        }
786    }
787}
788
789// Horrible COPY-PASTE
790pub struct ASliceAttIterator<'a, T> {
791    allocator: &'a HtmlAllocator,
792    t: PhantomData<T>,
793    id: u32,
794    id_end: u32,
795}
796impl<'a, T> Iterator for ASliceAttIterator<'a, T> {
797    type Item = &'a (KString, KString);
798    fn next(&mut self) -> Option<&'a (KString, KString)> {
799        if self.id < self.id_end {
800            let r = self
801                .allocator
802                .get_id(self.id)
803                .expect("slice should always point to allocated storage");
804            let v = self
805                .allocator
806                .get_att(r)
807                .expect("stored ids should always resolve");
808            self.id += 1;
809            Some(v)
810        } else {
811            None
812        }
813    }
814}
815// /horrible
816
817pub struct ASliceAIdIterator<'a, T> {
818    allocator: &'a HtmlAllocator,
819    t: PhantomData<T>,
820    id: u32,
821    id_end: u32,
822}
823impl<'a, T: AllocatorType> Iterator for ASliceAIdIterator<'a, T> {
824    type Item = AId<T>;
825    fn next(&mut self) -> Option<Self::Item> {
826        if self.id < self.id_end {
827            let r = self
828                .allocator
829                .get_id(self.id)
830                .expect("slice should always point to allocated storage");
831            self.id += 1;
832            Some(r)
833        } else {
834            None
835        }
836    }
837}
838
839impl<'a, T: AllocatorType> IntoIterator for AVec<'a, T> {
840    type Item = AId<T>;
841
842    type IntoIter = ASliceAIdIterator<'a, T>;
843
844    fn into_iter(self) -> Self::IntoIter {
845        ASliceAIdIterator {
846            allocator: self.allocator,
847            t: PhantomData,
848            id: self.start,
849            id_end: self.start + self.len,
850        }
851    }
852}
853
854// stupid copy-paste with 1 character added:
855impl<'a, T: AllocatorType> IntoIterator for &AVec<'a, T> {
856    type Item = AId<T>;
857
858    type IntoIter = ASliceAIdIterator<'a, T>;
859
860    fn into_iter(self) -> Self::IntoIter {
861        ASliceAIdIterator {
862            allocator: self.allocator,
863            t: PhantomData,
864            id: self.start,
865            id_end: self.start + self.len,
866        }
867    }
868}
869
870// Note: can't implement IntoIterator for ASlice, because ASlice does
871// not have a reference to HtmlAllocator and IntoIterator does not
872// allow to take one. See `iter_aid` method on ASlice instead.
873
874impl<'a, T: AllocatorType> ASlice<T> {
875    pub fn len(&self) -> u32 {
876        self.len
877    }
878
879    pub fn iter_node(&self, allocator: &'a HtmlAllocator) -> ASliceNodeIterator<'a, T> {
880        allocator.assert_regionid(self.regionid);
881        ASliceNodeIterator {
882            allocator,
883            t: PhantomData,
884            id: self.start,
885            id_end: self.start + self.len,
886        }
887    }
888    // Horrible COPY-PASTE
889    pub fn iter_att(&self, allocator: &'a HtmlAllocator) -> ASliceAttIterator<'a, T> {
890        allocator.assert_regionid(self.regionid);
891        ASliceAttIterator {
892            allocator,
893            t: PhantomData,
894            id: self.start,
895            id_end: self.start + self.len,
896        }
897    }
898
899    pub fn iter_aid(&self, allocator: &'a HtmlAllocator) -> ASliceAIdIterator<'a, T> {
900        ASliceAIdIterator {
901            allocator,
902            t: PhantomData,
903            id: self.start,
904            id_end: self.start + self.len,
905        }
906    }
907
908    pub fn try_filter_map<F: Fn(AId<T>) -> Result<Option<AId<T>>>>(
909        &self,
910        f: F,
911        capacity: Option<u32>, // None means self.len() will be used
912        allocator: &'a HtmlAllocator,
913    ) -> Result<AVec<'a, T>> {
914        let cap = capacity.unwrap_or_else(|| self.len());
915        let mut v = allocator.new_vec_with_capacity(cap)?;
916        let end = self.start + self.len;
917        for i in self.start..end {
918            let id = allocator
919                .get_id(i)
920                .expect("slice should always point to allocated storage");
921            if let Some(id2) = f(id)? {
922                // XX .with_context ?
923                v.push(id2)?; // should never fail if allocated w/ capacity
924            }
925        }
926        Ok(v)
927    }
928
929    /// Split the slice before the first element for which `f` returns true.
930    pub fn split_when<F: Fn(AId<T>) -> bool>(
931        &self,
932        f: F,
933        allocator: &'a HtmlAllocator,
934    ) -> Option<(ASlice<T>, ASlice<T>)> {
935        let end = self.start + self.len;
936        for place in self.start..end {
937            let id = allocator
938                .get_id(place)
939                .expect("slice should always point to allocated storage");
940            if f(id) {
941                return Some((
942                    ASlice {
943                        t: PhantomData,
944                        regionid: self.regionid,
945                        start: self.start,
946                        len: place - self.start,
947                    },
948                    ASlice {
949                        t: PhantomData,
950                        regionid: self.regionid,
951                        start: place,
952                        len: end - place,
953                    },
954                ));
955            }
956        }
957        None
958    }
959
960    /// Split the slice at position `i`, if that is within the slice.
961    pub fn split_at(&self, i: u32) -> Option<(ASlice<T>, ASlice<T>)> {
962        if i <= self.len {
963            Some((
964                ASlice {
965                    t: PhantomData,
966                    regionid: self.regionid,
967                    start: self.start,
968                    len: i,
969                },
970                ASlice {
971                    t: PhantomData,
972                    regionid: self.regionid,
973                    start: self.start + i,
974                    len: self.len - i,
975                },
976            ))
977        } else {
978            None
979        }
980    }
981
982    /// The first element and the rest, unless the slice is empty.
983    pub fn first_and_rest(&self, allocator: &'a HtmlAllocator) -> Option<(AId<T>, ASlice<T>)> {
984        if self.len >= 1 {
985            let id = allocator
986                .get_id(self.start)
987                .expect("slice should always point to allocated storage");
988            Some((
989                id,
990                ASlice {
991                    t: PhantomData,
992                    regionid: self.regionid,
993                    start: self.start + 1,
994                    len: self.len - 1,
995                },
996            ))
997        } else {
998            None
999        }
1000    }
1001
1002    pub fn get(&self, i: u32, allocator: &'a HtmlAllocator) -> Option<AId<T>> {
1003        if i < self.len {
1004            let id = self.start + i;
1005            allocator.get_id(id)
1006        } else {
1007            None
1008        }
1009    }
1010}
1011
1012fn unwrap_node(node: &Node, meta: &ElementMeta, strict: bool) -> Option<ASlice<Node>> {
1013    match node {
1014        Node::Element(e) => {
1015            if (!strict) || e.attr.len == 0 {
1016                Some(e.body.clone())
1017            } else {
1018                None
1019            }
1020        }
1021        Node::String(_) => None,
1022        Node::Preserialized(p) => {
1023            if p.meta == meta {
1024                warn!("can't unwrap_element of preserialized node");
1025                None
1026            } else {
1027                None
1028            }
1029        }
1030        Node::None => None,
1031    }
1032}
1033
1034impl<'a> ASlice<Node> {
1035    /// If this slice contains only one element of kind `meta` (and
1036    /// that element has no attributes, if `strict` is true), returns
1037    /// that element's body slice.
1038    pub fn unwrap_element_opt(
1039        &self,
1040        meta: &ElementMeta,
1041        strict: bool,
1042        allocator: &'a HtmlAllocator,
1043    ) -> Option<ASlice<Node>> {
1044        if self.len == 1 {
1045            let nodeid = self.get(0, allocator).expect("exists because len == 1");
1046            let node = allocator.get_node(nodeid).expect(
1047                "exists because checked when entered into slice", // was it?
1048            );
1049            unwrap_node(&*node, meta, strict)
1050        } else {
1051            None
1052        }
1053    }
1054
1055    /// If this slice contains only one element of kind `meta` (and
1056    /// that element has no attributes, if `strict` is true), returns
1057    /// that element's body slice, otherwise itself.
1058    pub fn unwrap_element(
1059        &self,
1060        meta: &ElementMeta,
1061        strict: bool,
1062        allocator: &'a HtmlAllocator,
1063    ) -> ASlice<Node> {
1064        self.unwrap_element_opt(meta, strict, allocator)
1065            .unwrap_or_else(|| self.clone())
1066    }
1067
1068    /// Unwrap even if there are multiple elements, unwrap all of
1069    /// those matching `meta`.
1070    pub fn unwrap_elements(
1071        &self,
1072        meta: &ElementMeta,
1073        strict: bool,
1074        allocator: &'a HtmlAllocator,
1075    ) -> Result<ASlice<Node>> {
1076        self.unwrap_element_opt(meta, strict, allocator)
1077            .map_or_else(
1078                || -> Result<ASlice<Node>> {
1079                    let mut v = allocator.new_vec();
1080                    for id in self.iter_aid(allocator) {
1081                        let node = allocator.get_node(id).expect(
1082                            // as long as region id is correct? todo: details again?
1083                            "nodes from slices should always be found?",
1084                        );
1085                        if let Some(subslice) = unwrap_node(&*node, meta, strict) {
1086                            for id in subslice.iter_aid(allocator) {
1087                                v.push(id)?;
1088                            }
1089                        } else {
1090                            v.push(id)?;
1091                        }
1092                    }
1093                    Ok(v.as_slice())
1094                },
1095                Ok,
1096            )
1097    }
1098}
1099
1100/// Serialized HTML fragment string. Can be included in
1101/// Node:s. Contains the metainformation about the outermost element
1102/// in the serialized fragment for dynamic DOM checking, and an
1103/// `Arc<str>` with the serialized data; `SerHtmlFrag` is thus
1104/// (reasonably) cheap to clone. This has pub fields for special
1105/// needs, be careful not to mis-use. The string needs to contain the
1106/// top-most tag in serialized form, too, and it should match the one
1107/// specified as `meta`.
1108#[derive(Debug, Clone)]
1109pub struct SerHtmlFrag {
1110    pub meta: &'static ElementMeta,
1111    pub string: Arc<str>,
1112}
1113
1114impl SerHtmlFrag {
1115    #[inline(always)]
1116    pub fn meta(&self) -> &'static ElementMeta {
1117        self.meta
1118    }
1119
1120    #[inline(always)]
1121    pub fn as_str(&self) -> &str {
1122        &*self.string
1123    }
1124
1125    pub fn as_arc_str(&self) -> Arc<str> {
1126        self.string.clone()
1127    }
1128}
1129
1130// lives *inside* an allocator only, thus no allocator field.
1131#[derive(Debug)]
1132pub enum Node {
1133    Element(Element),
1134    String(KString),
1135    Preserialized(SerHtmlFrag),
1136    None,
1137}
1138
1139impl Node {
1140    pub fn as_element(&self) -> Option<&Element> {
1141        match self {
1142            Node::Element(e) => Some(e),
1143            Node::String(_) => None,
1144            Node::Preserialized(_) => None,
1145            Node::None => None,
1146        }
1147    }
1148    pub fn try_element(&self) -> Result<&Element> {
1149        match self {
1150            Node::Element(e) => Ok(e),
1151            Node::String(_) => bail!("not a Node::String, but Node::Preserialized"),
1152            Node::Preserialized(_) => bail!("not an Node::Element, but Node::Preserialized"),
1153            Node::None => bail!("not an Node::Element, but Node::None"),
1154        }
1155    }
1156}
1157
1158// lives *inside* an allocator only via Node, thus no allocator field.
1159/// Invalid `Element`s can definitely be built (non-allowed child
1160/// elements), but still has public fields since it will be plucked
1161/// apart and verified in `allocate_element` before being stored. And
1162/// there's no mut access to the store.
1163#[derive(Debug, Clone)]
1164pub struct Element {
1165    pub meta: &'static ElementMeta,
1166    pub attr: ASlice<(KString, KString)>,
1167    pub body: ASlice<Node>,
1168}
1169
1170impl Element {
1171    pub fn meta(&self) -> &'static ElementMeta {
1172        self.meta
1173    }
1174    pub fn attr(&self) -> &ASlice<(KString, KString)> {
1175        &self.attr
1176    }
1177    pub fn body(&self) -> &ASlice<Node> {
1178        &self.body
1179    }
1180
1181    pub fn try_filter_map_body<'a, T: AllocatorType>(
1182        &self,
1183        f: impl Fn(AId<Node>) -> Result<Option<AId<Node>>>,
1184        allocator: &'a HtmlAllocator,
1185    ) -> Result<Element> {
1186        let body2 = self.body.try_filter_map(f, None, allocator)?;
1187        Ok(Element {
1188            meta: self.meta,
1189            attr: self.attr.clone(),
1190            body: body2.as_slice(),
1191        })
1192    }
1193}
1194
1195#[cfg(test)]
1196mod tests {
1197    use std::mem::size_of;
1198
1199    use super::*;
1200
1201    #[test]
1202    fn t_system_at_least_32bits() {
1203        // We use `n as usize` etc. everywhere, where n is u32. Make
1204        // sure this is OK.
1205        let n: u32 = u32::MAX;
1206        let _x: usize = n.try_into().expect("system has at least 32 bits");
1207    }
1208
1209    #[test]
1210    fn t_siz() {
1211        assert_eq!(size_of::<RegionId>(), 4);
1212        assert_eq!(size_of::<AId<Node>>(), 8);
1213    }
1214}