ahtml_html/
meta.rs

1//! Cleaned up and indexed data for fast DOM correctness verification.
2
3use crate::{
4    myfrom::MyFrom,
5    types::{AttributeType, MergedElement},
6};
7use anyhow::{anyhow, bail, Context, Result};
8use kstring::KString;
9use std::{
10    collections::{HashMap, HashSet},
11    env,
12    fmt::Display,
13    fs::read_dir,
14    hash::Hash,
15    io::{BufWriter, Write},
16    path::Path,
17    str::FromStr,
18};
19
20// =============================================================================
21// Attributes database
22
23// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes
24// Global attributes are attributes common to all HTML elements; they can be used on all elements, though they may have no effect on some elements.
25// Global attributes may be specified on all HTML elements, even those not specified in the standard.
26
27const GLOBAL_ATTRIBUTE_NAMES: &[&str] = &[
28    "accesskey",
29    "autocapitalize",
30    "autofocus",
31    "class",
32    "contenteditable",
33    // "contextmenu", //  "Deprecated",
34    // "data-*", XX  https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/data-*
35    "dir",
36    "draggable",
37    "enterkeyhint",
38    "exportparts", // Experimental
39    "hidden",
40    "id",
41    "inert",
42    "inputmode",
43    "is",
44    "itemid",
45    "itemprop",
46    "itemref",
47    "itemscope",
48    "itemtype",
49    "lang",
50    "nonce",
51    "part",
52    "popover",
53    "role",
54    "slot",
55    "spellcheck",
56    "style",
57    "tabindex",
58    "title",
59    "translate",
60    "virtualkeyboardpolicy",
61    // The ARIA role attribute and the multiple aria-* states and
62    // properties, used for ensuring accessibility.
63    "role",
64];
65
66const EVENT_HANDLER_ATTRIBUTE_NAMES: &[&str] = &[
67    "onabort",
68    "onautocomplete",
69    "onautocompleteerror",
70    "onblur",
71    "oncancel",
72    "oncanplay",
73    "oncanplaythrough",
74    "onchange",
75    "onclick",
76    "onclose",
77    "oncontextmenu",
78    "oncuechange",
79    "ondblclick",
80    "ondrag",
81    "ondragend",
82    "ondragenter",
83    "ondragleave",
84    "ondragover",
85    "ondragstart",
86    "ondrop",
87    "ondurationchange",
88    "onemptied",
89    "onended",
90    "onerror",
91    "onfocus",
92    "oninput",
93    "oninvalid",
94    "onkeydown",
95    "onkeypress",
96    "onkeyup",
97    "onload",
98    "onloadeddata",
99    "onloadedmetadata",
100    "onloadstart",
101    "onmousedown",
102    "onmouseenter",
103    "onmouseleave",
104    "onmousemove",
105    "onmouseout",
106    "onmouseover",
107    "onmouseup",
108    "onmousewheel",
109    "onpause",
110    "onplay",
111    "onplaying",
112    "onprogress",
113    "onratechange",
114    "onreset",
115    "onresize",
116    "onscroll",
117    "onseeked",
118    "onseeking",
119    "onselect",
120    "onshow",
121    "onsort",
122    "onstalled",
123    "onsubmit",
124    "onsuspend",
125    "ontimeupdate",
126    "ontoggle",
127    "onvolumechange",
128    "onwaiting",
129];
130
131// =============================================================================
132// Element database representation
133
134// The data is provided as .json files, but we want to include the
135// info in the binary statically. We want to use HashSet and HashMap
136// though, and then also KString since it may speed up access due to
137// data locality over &'static str (untested hypothesis). So we have
138// two variants of all datatypes, the Static* and the non-Static ones.
139
140// Data is first read from the json files into the non-Static
141// variants, then printed via `PrintStatic` in Static* syntax to an include
142// file, which is compiled into the binary. Then at start time, those
143// are converted back to the non-Static versions via MyFrom.
144
145trait PrintStatic {
146    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()>;
147}
148
149impl PrintStatic for KString {
150    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
151        write!(out, "{:?}", self.as_str())
152    }
153}
154
155// Helper wrappers
156
157struct StaticVec<'t, T>(&'t [T]);
158
159impl<T: PrintStatic> PrintStatic for Vec<T> {
160    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
161        write!(out, "StaticVec(&[\n")?;
162        let mut vs = self.iter();
163        let mut current = vs.next();
164        while let Some(v) = current {
165            v.print_static(out)?;
166            let next = vs.next();
167            if next.is_some() {
168                write!(out, ",\n")?
169            }
170            current = next;
171        }
172        write!(out, "])\n")
173    }
174}
175
176impl<'t, T1, T2: MyFrom<&'t T1>> MyFrom<&StaticVec<'t, T1>> for Vec<T2> {
177    fn myfrom(v: &StaticVec<'t, T1>) -> Self {
178        v.0.iter().map(|v| T2::myfrom(v)).collect()
179    }
180}
181
182struct StaticMap<'t, K, V>(&'t [(K, V)]);
183
184impl<K: PrintStatic + Ord, V: PrintStatic> PrintStatic for HashMap<K, V> {
185    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
186        write!(out, "StaticMap(&[\n")?;
187        let mut vals: Vec<_> = self.iter().collect();
188        vals.sort_by_key(|(k, _v)| *k);
189        let mut vs = vals.into_iter();
190        let mut current = vs.next();
191        while let Some((k, v)) = current {
192            write!(out, "(")?;
193            k.print_static(out)?;
194            write!(out, ", ")?;
195            v.print_static(out)?;
196            write!(out, ")")?;
197            let next = vs.next();
198            if next.is_some() {
199                write!(out, ",\n")?
200            }
201            current = next;
202        }
203        write!(out, "])\n")
204    }
205}
206
207impl<'t, K1, V1, K2: MyFrom<&'t K1> + Hash + Eq, V2: MyFrom<&'t V1>> MyFrom<&StaticMap<'t, K1, V1>>
208    for HashMap<K2, V2>
209{
210    fn myfrom(v: &StaticMap<'t, K1, V1>) -> Self {
211        v.0.iter()
212            .map(|(k, v)| (K2::myfrom(k), V2::myfrom(v)))
213            .collect()
214    }
215}
216
217struct StaticSet<'t, T>(&'t [T]);
218
219impl<'t, T: PrintStatic + Ord> PrintStatic for HashSet<T> {
220    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
221        write!(out, "StaticSet(&[\n")?;
222        let mut vals: Vec<_> = self.iter().collect();
223        vals.sort();
224        let mut vs = vals.into_iter();
225        let mut current = vs.next();
226        while let Some(v) = current {
227            v.print_static(out)?;
228            let next = vs.next();
229            if next.is_some() {
230                write!(out, ",\n")?
231            }
232            current = next;
233        }
234        write!(out, "])\n")
235    }
236}
237
238impl<'t, T1, T2: MyFrom<&'t T1> + Hash + Eq> MyFrom<&StaticSet<'t, T1>> for HashSet<T2> {
239    fn myfrom(v: &StaticSet<'t, T1>) -> Self {
240        v.0.iter().map(|v| T2::myfrom(v)).collect()
241    }
242}
243
244// AttributeType: see types.rs
245
246enum StaticAttributeType {
247    Bool,
248    KString,
249    Integer,
250    Float,
251    // These are unused:
252    // Identifier(&'t str),
253    // Enumerable(StaticVec<'t, &'t str>),
254}
255
256impl PrintStatic for AttributeType {
257    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
258        let mut pr = |s| write!(out, "StaticAttributeType::{s}");
259        match self {
260            AttributeType::Bool => pr("Bool"),
261            AttributeType::KString => pr("KString"),
262            AttributeType::Integer => pr("Integer"),
263            AttributeType::Float => pr("Float"),
264            AttributeType::Identifier(s) => {
265                write!(out, "StaticAttributeType::Identifier({:?})", s.as_str())
266            }
267            AttributeType::Enumerable(vec) => {
268                write!(out, "StaticAttributeType::Enumerable(")?;
269                vec.print_static(out)?;
270                write!(out, ")")
271            }
272        }
273    }
274}
275
276impl<'t> MyFrom<&StaticAttributeType> for AttributeType {
277    fn myfrom(s: &StaticAttributeType) -> Self {
278        match s {
279            StaticAttributeType::Bool => AttributeType::Bool,
280            StaticAttributeType::KString => AttributeType::KString,
281            StaticAttributeType::Integer => AttributeType::Integer,
282            StaticAttributeType::Float => AttributeType::Float,
283            // StaticAttributeType::Identifier(v) => AttributeType::Identifier(KString::myfrom(v)),
284            // StaticAttributeType::Enumerable(v) => AttributeType::Enumerable(Vec::myfrom(v)),
285        }
286    }
287}
288
289#[derive(Debug)]
290pub struct Attribute {
291    // pub name: KString, -- already known as key in HashMap
292    pub description: KString,
293    pub ty: AttributeType,
294}
295
296struct StaticAttribute<'t> {
297    // pub name: KString, -- already known as key in HashMap
298    pub description: &'t str,
299    pub ty: StaticAttributeType,
300}
301
302impl PrintStatic for Attribute {
303    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
304        write!(
305            out,
306            "StaticAttribute {{\n    description: {:?},\n    ty: ",
307            self.description.as_str()
308        )?;
309        self.ty.print_static(out)?;
310        write!(out, "}}\n")
311    }
312}
313
314impl<'t> MyFrom<&StaticAttribute<'t>> for Attribute {
315    fn myfrom(s: &StaticAttribute<'t>) -> Self {
316        Attribute {
317            description: KString::myfrom(s.description),
318            ty: AttributeType::myfrom(&s.ty),
319        }
320    }
321}
322
323#[derive(Debug)]
324pub struct ElementMeta {
325    pub tag_name: KString,
326    pub has_global_attributes: bool,
327    pub has_closing_tag: bool,
328    pub attributes: HashMap<KString, Attribute>,
329    pub allows_child_text: bool,
330    pub child_elements: HashSet<KString>,
331}
332
333struct StaticElementMeta<'t> {
334    pub tag_name: &'t str,
335    pub has_global_attributes: bool,
336    pub has_closing_tag: bool,
337    pub attributes: StaticMap<'t, &'t str, StaticAttribute<'t>>,
338    pub allows_child_text: bool,
339    pub child_elements: StaticSet<'t, &'t str>,
340}
341
342impl PrintStatic for ElementMeta {
343    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
344        write!(out, "StaticElementMeta {{\n")?;
345        write!(out, "    tag_name: {:?}\n", self.tag_name.as_str())?;
346        write!(
347            out,
348            ",\n    has_global_attributes: {:?}\n",
349            self.has_global_attributes
350        )?;
351        write!(out, ",\n    has_closing_tag: {:?}\n", self.has_closing_tag)?;
352        write!(out, ",\n    attributes: ")?;
353        self.attributes.print_static(out)?;
354        write!(
355            out,
356            ",\n    allows_child_text: {:?}\n",
357            self.allows_child_text
358        )?;
359        write!(out, ",\n    child_elements: ")?;
360        self.child_elements.print_static(out)?;
361        write!(out, "}}\n")
362    }
363}
364
365impl<'t> MyFrom<&StaticElementMeta<'t>> for ElementMeta {
366    fn myfrom(s: &StaticElementMeta<'t>) -> Self {
367        ElementMeta {
368            tag_name: KString::myfrom(&s.tag_name),
369            has_global_attributes: s.has_global_attributes,
370            has_closing_tag: s.has_closing_tag,
371            attributes: HashMap::myfrom(&s.attributes),
372            allows_child_text: s.allows_child_text,
373            child_elements: HashSet::myfrom(&s.child_elements),
374        }
375    }
376}
377
378impl PartialEq for ElementMeta {
379    fn eq(&self, other: &Self) -> bool {
380        std::ptr::eq(self, other) || self.tag_name == other.tag_name
381    }
382}
383
384impl Eq for ElementMeta {}
385
386#[derive(Debug)]
387pub struct MetaDb {
388    pub global_attribute_names: HashSet<KString>,
389    pub elementmeta: HashMap<KString, ElementMeta>,
390}
391
392struct StaticMetaDb<'t> {
393    pub global_attribute_names: StaticSet<'t, &'t str>,
394    pub elementmeta: StaticMap<'t, &'t str, StaticElementMeta<'t>>,
395}
396
397impl PrintStatic for MetaDb {
398    fn print_static<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
399        write!(out, "StaticMetaDb {{\n")?;
400        write!(out, "    global_attribute_names: ")?;
401        self.global_attribute_names.print_static(out)?;
402        write!(out, ",\n    elementmeta: ")?;
403        self.elementmeta.print_static(out)?;
404        write!(out, "}}\n")
405    }
406}
407
408impl<'t> MyFrom<&StaticMetaDb<'t>> for MetaDb {
409    fn myfrom(s: &StaticMetaDb<'t>) -> Self {
410        MetaDb {
411            global_attribute_names: HashSet::myfrom(&s.global_attribute_names),
412            elementmeta: HashMap::myfrom(&s.elementmeta),
413        }
414    }
415}
416
417fn read_types(path: &Path) -> Result<MergedElement> {
418    Ok(serde_json::from_reader(std::io::BufReader::new(
419        std::fs::File::open(path)?,
420    ))?)
421}
422
423fn read_types_db(merged_elements_dir: &Path) -> Result<HashMap<KString, MergedElement>> {
424    (|| -> Result<HashMap<KString, MergedElement>> {
425        let mut m = HashMap::new();
426        for entry in read_dir(merged_elements_dir)
427            .with_context(|| anyhow!("reading directory {merged_elements_dir:?}"))?
428        {
429            let path = entry?.path();
430            (|| -> Result<()> {
431                let filename = path
432                    .file_name()
433                    .ok_or_else(|| anyhow!("path has no file_name: {:?}", path))?;
434                let name = filename.to_string_lossy();
435                let elementname = name
436                    .strip_suffix(".json")
437                    .ok_or_else(|| anyhow!("path is missing .json suffix: {:?}", path))?;
438
439                m.insert(KString::from_ref(elementname), read_types(&path)?);
440                Ok(())
441            })()
442            .with_context(|| anyhow!("path {:?}", path))?;
443        }
444        Ok(m)
445    })()
446    .with_context(|| anyhow!("reading types db from {merged_elements_dir:?}"))
447}
448
449// trait ToFunction<K, V, F: Fn(&K) -> Option<&V>> {
450//     fn to_function(self) -> F;
451// }
452
453// impl<K, V, F: Fn(&K) -> Option<&V>> ToFunction<K, V, F> for HashMap<K, V> {
454//     fn to_function(self) -> F {
455//     }
456// }
457
458pub fn read_meta_db_from_json(merged_elements_dir: &Path) -> Result<MetaDb> {
459    let empty_kstring = KString::from_ref("");
460
461    let ts = read_types_db(merged_elements_dir)?;
462
463    let mut tag_name_by_struct_name: HashMap<KString, KString> = HashMap::new();
464    for (tag_name, elt) in &ts {
465        tag_name_by_struct_name.insert(elt.struct_name.clone(), tag_name.clone());
466    }
467
468    let mut elementmeta = HashMap::new();
469    for (k, v) in ts {
470        // dbg!((&k, &v));
471        let mut attributes = HashMap::new();
472        for att in v.attributes {
473            attributes.insert(
474                att.name,
475                Attribute {
476                    description: att.description,
477                    ty: att.ty,
478                },
479            );
480        }
481
482        let mut child_elements: HashSet<KString> = v
483            .permitted_child_elements
484            .iter()
485            .map(|k| {
486                if let Some(tn) = tag_name_by_struct_name.get(k) {
487                    tn.clone()
488                } else {
489                    if k == "Text" {
490                        empty_kstring.clone()
491                    } else {
492                        panic!("unknown permitted child element name {:?}", k)
493                    }
494                }
495            })
496            .collect();
497        let allows_child_text = child_elements.take(&empty_kstring).is_some();
498
499        elementmeta.insert(
500            k,
501            ElementMeta {
502                tag_name: v.tag_name,
503                has_global_attributes: v.has_global_attributes,
504                has_closing_tag: v.has_closing_tag,
505                attributes,
506                allows_child_text,
507                child_elements,
508            },
509        );
510    }
511
512    let mut global_attribute_names: HashSet<KString> = HashSet::new();
513    for n in GLOBAL_ATTRIBUTE_NAMES {
514        global_attribute_names.insert(KString::from_static(n));
515    }
516    for n in EVENT_HANDLER_ATTRIBUTE_NAMES {
517        global_attribute_names.insert(KString::from_static(n));
518    }
519
520    Ok(MetaDb {
521        global_attribute_names,
522        elementmeta,
523    })
524}
525
526// once again, XX move to lib
527fn opt_get_env<T: FromStr>(varname: &str) -> Result<Option<T>>
528where
529    T::Err: Display,
530{
531    match env::var(varname) {
532        Ok(s) => {
533            // dbg!(&s);
534            Ok(Some(s.parse().map_err(|e| {
535                anyhow!("could not parse {varname:?} env var with contents {s:?}: {e}")
536            })?))
537        }
538        Err(e) => match e {
539            env::VarError::NotPresent => Ok(None),
540            env::VarError::NotUnicode(_) => bail!("could not decode {varname:?} env var: {e}"),
541        },
542    }
543}
544
545fn get_env_bool(varname: &str) -> Result<bool> {
546    Ok(opt_get_env(varname)?.unwrap_or(false))
547}
548
549include!("../includes/static_meta_db.rs");
550
551pub fn read_meta_db() -> Result<MetaDb> {
552    let debug = get_env_bool("HTML_META_DEBUG")?;
553    if let Some(dir) = opt_get_env::<String>("HTML_READ_META_DB_FROM_JSON_DIR")? {
554        if debug {
555            eprintln!("reading meta db from json")
556        };
557        let metadb = read_meta_db_from_json(dir.as_ref())?;
558        // XX HACK
559        if let Some(path) = opt_get_env::<String>("WRITE_STATIC_META_DB_RS_PATH")? {
560            if debug {
561                eprintln!("rewriting {path:?} from meta db from json..")
562            };
563            let mut out = BufWriter::new(
564                std::fs::File::create(path.as_str())
565                    .with_context(|| anyhow!("creating file {path:?} for writing"))?,
566            );
567            (|| -> Result<()> {
568                let out = &mut out;
569                write!(
570                    out,
571                    "
572// This file was auto-generated by meta.rs from the ahtml_html crate,
573// using the meta database at {path:?}
574
575const STATIC_META_DB: StaticMetaDb = "
576                )?;
577                metadb.print_static(out)?;
578                write!(out, ";\n")?;
579                out.flush()?;
580                Ok(())
581            })()
582            .with_context(|| anyhow!("writing to {path:?}"))?;
583            if debug {
584                eprintln!("rewriting {path:?} from meta db from json..done.")
585            };
586        }
587        Ok(metadb)
588    } else {
589        if debug {
590            eprintln!("reading meta db from static")
591        };
592        Ok(MetaDb::myfrom(&STATIC_META_DB))
593    }
594}