1use crate::{att, ASlice, HtmlAllocator, Node};
2use anyhow::Result;
3
4use chj_util::nowarn as warn;
5
6fn find_from(s: &str, pos: usize, needle: &str) -> Option<usize> {
7 (&s[pos..]).find(needle).map(|p| p + pos)
8}
9
10fn is_url_character(c: char) -> bool {
13 c.is_ascii_alphanumeric()
14 || c == ':'
15 || c == '/'
16 || c == '.'
17 || c == '-'
18 || c == '%'
19 || c == '='
20}
21
22fn is_character_to_exclude_at_end(c: char) -> bool {
25 c == ':' || c == '.' || c == '-' || c == '%' || c == '='
26}
27
28fn is_character_in_front(c: Option<char>) -> bool {
30 if let Some(c) = c {
32 !c.is_ascii_alphanumeric()
33 } else {
34 true
35 }
36}
37
38pub fn autolink(html: &HtmlAllocator, text: &str) -> Result<ASlice<Node>> {
41 warn!("autolink for {text:?}");
42 let mut nodes = html.new_vec();
43 let mut pos_done = 0;
44 let mut pos_remainder = 0;
45 while let Some(pos) = find_from(text, pos_remainder, "http") {
46 warn!("found pos={pos}");
47 let pos_rest = pos + 4; let mut backwardsiter = (&text[0..pos]).chars().rev();
50 if !is_character_in_front(backwardsiter.next()) {
51 warn!("nope 0");
52 pos_remainder = pos_rest;
53 continue;
54 }
55
56 let mut restiter = (&text[pos_rest..]).chars();
57 let c0 = restiter.next();
58 let c1 = restiter.next();
59 let skip_len = match c0 {
60 Some('s') => match c1 {
61 Some(':') => 2,
62 _ => {
63 warn!("nope 1");
64 pos_remainder = pos_rest;
65 continue;
66 }
67 },
68 Some(':') => 1,
69 _ => {
70 warn!("nope 2");
71 pos_remainder = pos_rest;
72 continue;
73 }
74 };
75 let pos_rest = pos_rest + skip_len;
76 if !(&text[pos_rest..]).starts_with("//") {
77 warn!("nope: no // in {:?}", &text[pos_rest..]);
78 pos_remainder = pos_rest;
79 continue;
80 }
81 let pos_rest = pos_rest + 2;
82 let (one_before_end, end); 'find_end: loop {
84 let mut last_i = 0;
86 for (i, c) in (&text[pos_rest..]).char_indices() {
87 if !is_url_character(c) {
88 one_before_end = pos_rest + last_i;
89 end = pos_rest + i;
90 break 'find_end;
91 }
92 last_i = i;
93 }
94 one_before_end = pos_rest + last_i;
95 end = text.len();
96 break;
97 }
98
99 if one_before_end == end {
100 warn!("nope: nothing after //");
101 pos_remainder = pos_rest;
102 continue;
103 }
104
105 let char_before_end = text[one_before_end..]
106 .chars()
107 .next()
108 .expect("char is there because we maintained one_before_end to point there"); let real_end = if is_character_to_exclude_at_end(char_before_end) {
110 one_before_end
111 } else {
112 end
113 };
114 let url = &text[pos..real_end];
115
116 if pos - pos_done > 0 {
117 nodes.push(html.text(&text[pos_done..pos])?)?;
118 }
119 let link = html.a([att("href", url)], [html.text(url)?])?;
120 nodes.push(link)?;
121 warn!("pushed node: {}", html.to_html_string(link, false));
122
123 pos_done = real_end;
124 pos_remainder = real_end;
125 }
126
127 if pos_done < text.len() {
128 nodes.push(html.text(&text[pos_done..])?)?;
129 }
130
131 Ok(nodes.as_slice())
132}
133
134#[cfg(test)]
135mod tests {
136 use crate::Print;
137
138 use super::*;
139
140 #[test]
141 fn t_find_from() {
142 assert_eq!(find_from("hello world", 0, "World"), None);
143 assert_eq!(find_from("hello world", 0, "world"), Some(6));
144 assert_eq!(find_from("hello world", 5, "world"), Some(6));
145 assert_eq!(find_from("hello world", 6, "world"), Some(6));
146 assert_eq!(find_from("hello world", 7, "world"), None);
147 assert_eq!(find_from("hello world in many worlds", 3, "world"), Some(6));
148 assert_eq!(
149 find_from("hello world in many worlds", 7, "world"),
150 Some(20)
151 );
152 }
153
154 fn t(s: &str) -> String {
155 let html = HtmlAllocator::new(1000, std::sync::Arc::new(""));
156 let slice = autolink(&html, s).unwrap();
157 slice.to_html_fragment_string(&html).unwrap()
158 }
159
160 #[test]
161 fn t_() {
162 assert_eq!(t("http:// "), "http:// ");
163 assert_eq!(t("http://"), "http://");
164 assert_eq!(t(""), "");
165 assert_eq!(t("foo"), "foo");
166 assert_eq!(t("http"), "http");
167 assert_eq!(t("https"), "https");
168 assert_eq!(t("http:"), "http:");
169 assert_eq!(t("http:/"), "http:/");
170 assert_eq!(t("http://foo"), "<a href=\"http://foo\">http://foo</a>");
171 assert_eq!(
172 t("There's http://foo.com there."),
173 "There's <a href=\"http://foo.com\">http://foo.com</a> there."
174 );
175 assert_eq!(
176 t("There's http://foo.com. Yes."),
177 "There's <a href=\"http://foo.com\">http://foo.com</a>. Yes."
178 );
179 assert_eq!(
180 t("http://foo.com."),
181 "<a href=\"http://foo.com\">http://foo.com</a>."
182 );
183 assert_eq!(t("hmhttp://foo.com."), "hmhttp://foo.com.");
184 }
185}