evobench_tools/silo/
query.rs

1use std::sync::Arc;
2
3use noisy_float::prelude::*;
4use serde::{Deserialize, Serialize};
5
6use crate::utillib::arc::CloneArc;
7
8macro_rules! copy {
9    { $n:ident } => {
10        #[allow(non_snake_case)]
11        let $n = *$n;
12    }
13}
14
15// macro_rules! clone_arc {
16//     { $n:ident } => {
17//         #[allow(non_snake_case)]
18//         let $n = crate::utillib::arc::CloneArc::clone_arc($n);
19//     }
20// }
21
22#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
23pub struct DayDate(pub String);
24
25// pub enum Column {
26//     PangoLineage, // pangoLineage
27// }
28
29#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
30#[allow(non_snake_case)]
31pub struct Column(String);
32
33#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
34#[allow(non_snake_case)]
35#[serde(deny_unknown_fields)]
36#[serde(tag = "type")]
37pub enum FilterExpression {
38    Or {
39        children: Vec<Arc<FilterExpression>>,
40    },
41    And {
42        children: Vec<Arc<FilterExpression>>,
43    },
44    Not {
45        child: Arc<FilterExpression>,
46    },
47    #[serde(rename = "N-Of")]
48    NOf {
49        children: Vec<Arc<FilterExpression>>,
50        matchExactly: bool,
51        numberOfMatchers: usize,
52    },
53
54    DateBetween {
55        from: Option<DayDate>,
56        to: Option<DayDate>,
57        column: Column,
58    },
59    Lineage {
60        column: Column,
61        includeSublineages: bool,
62        value: String,
63    },
64    StringEquals {
65        column: Column,
66        value: String,
67    },
68    True {},
69    NucleotideEquals {
70        symbol: String,
71        position: usize,
72    },
73    HasNucleotideMutation {
74        position: usize,
75    },
76    HasAminoAcidMutation {
77        position: usize,
78        sequenceName: String,
79    },
80    FloatBetween {
81        column: Column,
82        from: N64,
83        to: N64,
84    },
85    AminoAcidInsertionContains {
86        position: usize,
87        sequenceName: String, // what kind of?
88        value: String,        // what kind of?
89    },
90    AminoAcidEquals {
91        position: usize,
92        sequenceName: String, //
93        symbol: String,       // can be "*"
94    },
95}
96
97impl FilterExpression {
98    pub fn is_not(&self) -> Option<&Arc<FilterExpression>> {
99        match self {
100            FilterExpression::Not { child } => Some(child),
101            _ => None,
102        }
103    }
104
105    pub fn is_or(&self) -> Option<&Vec<Arc<FilterExpression>>> {
106        match self {
107            FilterExpression::Or { children } => Some(children),
108            _ => None,
109        }
110    }
111
112    pub fn is_and(&self) -> Option<&Vec<Arc<FilterExpression>>> {
113        match self {
114            FilterExpression::And { children } => Some(children),
115            _ => None,
116        }
117    }
118
119    pub fn optimize(self: &Arc<Self>) -> Arc<FilterExpression> {
120        match &**self {
121            FilterExpression::Or { children } => {
122                if children.len() == 1 {
123                    children[0].clone_arc().optimize()
124                } else {
125                    let mut new_children = Vec::new();
126                    for child in children {
127                        let child = child.optimize();
128                        if let Some(subchildren) = child.is_or() {
129                            for child in subchildren {
130                                // already optimized
131                                new_children.push(child.clone_arc());
132                            }
133                        } else {
134                            new_children.push(child);
135                        }
136                    }
137                    FilterExpression::Or {
138                        children: new_children,
139                    }
140                    .into()
141                }
142            }
143            FilterExpression::And { children } => {
144                if children.len() == 1 {
145                    children[0].clone_arc().optimize()
146                } else {
147                    let mut new_children = Vec::new();
148                    for child in children {
149                        let child = child.optimize();
150                        if let Some(subchildren) = child.is_and() {
151                            for child in subchildren {
152                                // already optimized
153                                new_children.push(child.clone_arc());
154                            }
155                        } else {
156                            new_children.push(child);
157                        }
158                    }
159                    FilterExpression::And {
160                        children: new_children,
161                    }
162                    .into()
163                }
164            }
165            FilterExpression::Not { child } => {
166                let child = child.optimize();
167                if let Some(child_child) = child.is_not() {
168                    child_child.clone_arc()
169                } else {
170                    FilterExpression::Not { child }.into()
171                }
172            }
173            FilterExpression::NOf {
174                children,
175                matchExactly,
176                numberOfMatchers,
177            } => {
178                let children = children.iter().map(|e| e.optimize()).collect();
179                copy!(matchExactly);
180                copy!(numberOfMatchers);
181                FilterExpression::NOf {
182                    children,
183                    matchExactly,
184                    numberOfMatchers,
185                }
186                .into()
187            }
188
189            // Non-recursive cases, not optimizable
190            FilterExpression::DateBetween {
191                from: _,
192                to: _,
193                column: _,
194            } => self.clone_arc(),
195            FilterExpression::Lineage {
196                column: _,
197                includeSublineages: _,
198                value: _,
199            } => self.clone_arc(),
200            FilterExpression::StringEquals {
201                column: _,
202                value: _,
203            } => self.clone_arc(),
204            FilterExpression::True {} => self.clone_arc(),
205            FilterExpression::NucleotideEquals {
206                symbol: _,
207                position: _,
208            } => self.clone_arc(),
209            FilterExpression::HasNucleotideMutation { position: _ } => self.clone_arc(),
210            FilterExpression::HasAminoAcidMutation {
211                position: _,
212                sequenceName: _,
213            } => self.clone_arc(),
214            FilterExpression::FloatBetween {
215                column: _,
216                from: _,
217                to: _,
218            } => self.clone_arc(),
219            FilterExpression::AminoAcidInsertionContains {
220                position: _,
221                sequenceName: _,
222                value: _,
223            } => self.clone_arc(),
224            FilterExpression::AminoAcidEquals {
225                position: _,
226                sequenceName: _,
227                symbol: _,
228            } => self.clone_arc(),
229        }
230    }
231}
232
233#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
234#[allow(non_snake_case)]
235#[serde(deny_unknown_fields)]
236pub enum Order {
237    #[serde(rename = "ascending")]
238    Ascending,
239    #[serde(rename = "descending")]
240    Descending,
241}
242
243#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
244#[allow(non_snake_case)]
245#[serde(deny_unknown_fields)]
246pub struct FieldOrder {
247    field: String,
248    order: Order,
249}
250
251#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
252#[allow(non_snake_case)]
253#[serde(deny_unknown_fields)]
254#[serde(tag = "type")]
255pub enum Action {
256    AminoAcidMutations {
257        minProportion: N64,
258        randomize: bool,
259        orderByFields: Option<Vec<FieldOrder>>,
260        limit: Option<usize>,
261    },
262    Details {
263        fields: Vec<String>,
264        randomize: bool,
265    },
266    Mutations {
267        minProportion: N64,
268        randomize: bool,
269        limit: Option<usize>,
270    },
271    Aggregated {
272        groupByFields: Option<Vec<String>>, // non-empty ones?
273        randomize: bool,
274        orderByFields: Option<Vec<FieldOrder>>,
275        limit: Option<usize>,
276    },
277    AminoAcidInsertions {
278        randomize: bool,
279    },
280    FastaAligned {
281        randomize: bool,
282        sequenceName: String, // "main" und so
283    },
284    Insertions {
285        randomize: bool,
286    },
287}
288
289#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
290#[serde(deny_unknown_fields)]
291#[allow(non_snake_case)]
292pub struct Query {
293    pub action: Arc<Action>,
294    pub filterExpression: Arc<FilterExpression>,
295}
296
297impl Query {
298    pub fn optimize(&self) -> Query {
299        let Query {
300            action,
301            filterExpression,
302        } = self;
303        let action = action.clone_arc();
304        #[allow(non_snake_case)]
305        let filterExpression = filterExpression.optimize();
306        Query {
307            action,
308            filterExpression,
309        }
310    }
311}