Skip to main content

powerio_dist/dss/
lex.rs

1//! Tokenizer matching OpenDSS's TParser (Parser/ParserDel.cpp).
2//!
3//! A command line is a sequence of parameters, positional or `name=value`.
4//! Delimiters are `,` and `=` plus space and tab; a token opening with one of
5//! `( " ' [ {` runs to the matching closer and keeps delimiters inside;
6//! `!` and `//` start a comment that eats the rest of the line. A token
7//! beginning with `@` is replaced by the named parser variable, keeping any
8//! `.node` suffix. Quoted tokens parse as RPN when read as numbers; vector
9//! values re-tokenize their content with `|` terminating a matrix row.
10
11use std::collections::BTreeMap;
12
13use super::rpn::{self, RpnCalc};
14
15/// Parser variables (`var @x=...`), looked up case insensitively with the
16/// leading `@` included in the key.
17pub type VarMap = BTreeMap<String, String>;
18
19const BEGIN_QUOTE: &[u8] = b"(\"'[{";
20const END_QUOTE: &[u8] = b")\"']}";
21
22/// What ended the last token.
23#[derive(Clone, Copy, PartialEq, Debug)]
24enum Delim {
25    Whitespace,
26    Char(u8),
27    Comment,
28}
29
30/// One parameter from a command line.
31#[derive(Clone, Debug, PartialEq)]
32pub struct Param {
33    /// Property name to the left of `=`; `None` for a positional value.
34    pub name: Option<String>,
35    pub value: Value,
36}
37
38/// A raw value token. `quoted` records that the token came from a quote pair,
39/// which switches numeric interpretation to RPN.
40#[derive(Clone, Debug, PartialEq, Default)]
41pub struct Value {
42    pub text: String,
43    pub quoted: bool,
44}
45
46/// A `bus1=name.1.2.0` bus reference: name plus ordered node numbers.
47#[derive(Clone, Debug, PartialEq)]
48pub struct BusSpec {
49    pub name: String,
50    /// Node numbers as written; `0` is ground. Unparseable nodes become -1,
51    /// matching the reference parser's error marker.
52    pub nodes: Vec<i32>,
53}
54
55#[derive(Debug, thiserror::Error, PartialEq)]
56pub enum ValueError {
57    #[error("`{0}` is not a number")]
58    NotANumber(String),
59    #[error("bad RPN token `{token}` in `{expr}`")]
60    BadRpn { expr: String, token: String },
61}
62
63pub struct Scanner<'a> {
64    buf: &'a [u8],
65    pos: usize,
66    last_delim: Delim,
67    /// Extra delimiter, the matrix row terminator `|` during vector parsing.
68    row_term: bool,
69    vars: Option<&'a VarMap>,
70}
71
72impl<'a> Scanner<'a> {
73    pub fn new(line: &'a str, vars: Option<&'a VarMap>) -> Self {
74        let mut s = Scanner {
75            buf: line.as_bytes(),
76            pos: 0,
77            last_delim: Delim::Whitespace,
78            row_term: false,
79            vars,
80        };
81        s.skip_whitespace();
82        s
83    }
84
85    fn skip_whitespace(&mut self) {
86        while self.pos < self.buf.len() && matches!(self.buf[self.pos], b' ' | b'\t') {
87            self.pos += 1;
88        }
89    }
90
91    fn is_delim_char(&self, b: u8) -> bool {
92        b == b',' || b == b'=' || (self.row_term && b == b'|')
93    }
94
95    fn at_comment(&self) -> bool {
96        match self.buf.get(self.pos) {
97            Some(b'!') => true,
98            Some(b'/') => self.buf.get(self.pos + 1) == Some(&b'/'),
99            _ => false,
100        }
101    }
102
103    /// TParser::GetToken. Returns `None` at end of line; an empty token can
104    /// occur mid stream (e.g. between consecutive commas), as in the
105    /// reference.
106    fn get_token(&mut self) -> Option<(String, bool)> {
107        if self.pos >= self.buf.len() {
108            return None;
109        }
110        self.last_delim = Delim::Whitespace;
111        let mut quoted = false;
112        let text;
113
114        let open = self.buf[self.pos];
115        if let Some(qi) = BEGIN_QUOTE.iter().position(|&q| q == open) {
116            let close = END_QUOTE[qi];
117            self.pos += 1;
118            let start = self.pos;
119            while self.pos < self.buf.len() && self.buf[self.pos] != close {
120                self.pos += 1;
121            }
122            text = String::from_utf8_lossy(&self.buf[start..self.pos]).into_owned();
123            if self.pos < self.buf.len() {
124                self.pos += 1; // past the closer
125            }
126            quoted = true;
127        } else {
128            let start = self.pos;
129            while self.pos < self.buf.len() {
130                if self.at_comment() {
131                    self.last_delim = Delim::Comment;
132                    break;
133                }
134                let b = self.buf[self.pos];
135                if self.is_delim_char(b) {
136                    self.last_delim = Delim::Char(b);
137                    break;
138                }
139                if matches!(b, b' ' | b'\t') {
140                    self.last_delim = Delim::Whitespace;
141                    break;
142                }
143                self.pos += 1;
144            }
145            text = String::from_utf8_lossy(&self.buf[start..self.pos]).into_owned();
146        }
147
148        if self.last_delim == Delim::Comment {
149            self.pos = self.buf.len();
150            return Some((text, quoted));
151        }
152
153        // Move past one terminating delimiter, eating whitespace around it,
154        // so `a = b` and `a=b` scan identically.
155        if self.last_delim == Delim::Whitespace {
156            self.skip_whitespace();
157        }
158        if self.pos < self.buf.len() {
159            if self.at_comment() {
160                self.pos = self.buf.len();
161                return Some((text, quoted));
162            }
163            let b = self.buf[self.pos];
164            if self.is_delim_char(b) {
165                self.last_delim = Delim::Char(b);
166                self.pos += 1;
167            }
168        }
169        self.skip_whitespace();
170        Some((text, quoted))
171    }
172
173    /// TParser::CheckforVar: a token starting with `@` is replaced by its
174    /// variable value, keeping a `.node.node` suffix (`^` also cuts the
175    /// name). A value stored as `{...}` unwraps and becomes a quoted token.
176    fn substitute(&self, token: String, quoted: bool) -> (String, bool) {
177        if token.len() < 2 || !token.starts_with('@') {
178            return (token, quoted);
179        }
180        let Some(vars) = self.vars else {
181            return (token, quoted);
182        };
183        let cut = token.find(['.', '^']).unwrap_or(token.len());
184        let (name, suffix) = token.split_at(cut);
185        let key = name.to_ascii_lowercase();
186        let Some(value) = vars.get(&key) else {
187            return (token, quoted);
188        };
189        if let Some(inner) = value.strip_prefix('{').and_then(|v| v.strip_suffix('}')) {
190            (format!("{inner}{suffix}"), true)
191        } else {
192            (format!("{value}{suffix}"), quoted)
193        }
194    }
195
196    /// TParser::GetNextParam: one positional or `name=value` parameter.
197    /// Variable substitution applies to the value, never the name.
198    pub fn next_param(&mut self) -> Option<Param> {
199        let (tok, quoted) = self.get_token()?;
200        let (name, raw) = if self.last_delim == Delim::Char(b'=') {
201            (Some(tok), self.get_token().unwrap_or_default())
202        } else {
203            (None, (tok, quoted))
204        };
205        let (text, quoted) = self.substitute(raw.0, raw.1);
206        Some(Param {
207            name,
208            value: Value { text, quoted },
209        })
210    }
211
212    /// Remaining unscanned text, trimmed; the argument tail for commands that
213    /// take free text.
214    pub fn remainder(&self) -> &str {
215        std::str::from_utf8(&self.buf[self.pos.min(self.buf.len())..])
216            .unwrap_or_default()
217            .trim()
218    }
219}
220
221impl Value {
222    pub fn new(text: impl Into<String>) -> Self {
223        Value {
224            text: text.into(),
225            quoted: false,
226        }
227    }
228
229    /// TParser::MakeDouble_: quoted tokens evaluate as RPN, bare tokens must
230    /// be plain numbers. An empty value is 0, as in the reference.
231    pub fn to_f64(&self, vars: Option<&VarMap>) -> Result<f64, ValueError> {
232        if self.text.is_empty() {
233            return Ok(0.0);
234        }
235        if self.quoted {
236            return self.eval_rpn(vars);
237        }
238        rpn::parse_number(&self.text).ok_or_else(|| ValueError::NotANumber(self.text.clone()))
239    }
240
241    /// TParser::MakeInteger_: parse as a double and round.
242    pub fn to_i64(&self, vars: Option<&VarMap>) -> Result<i64, ValueError> {
243        self.to_f64(vars).map(|v| v.round() as i64)
244    }
245
246    fn eval_rpn(&self, vars: Option<&VarMap>) -> Result<f64, ValueError> {
247        let mut calc = RpnCalc::new();
248        let mut scan = Scanner::new(&self.text, vars);
249        while let Some((tok, _)) = scan.get_token() {
250            if tok.is_empty() {
251                continue;
252            }
253            let (tok, _) = scan.substitute(tok, false);
254            if !calc.apply(&tok) {
255                return Err(ValueError::BadRpn {
256                    expr: self.text.clone(),
257                    token: tok,
258                });
259            }
260        }
261        Ok(calc.x())
262    }
263
264    /// TParser::ParseAsVector over the whole value: numbers separated by
265    /// whitespace or commas. `|` row terminators split a matrix value into
266    /// rows; a plain vector is one row.
267    pub fn to_rows(&self, vars: Option<&VarMap>) -> Result<Vec<Vec<f64>>, ValueError> {
268        let mut rows = Vec::new();
269        let mut row = Vec::new();
270        let mut scan = Scanner::new(&self.text, vars);
271        scan.row_term = true;
272        while let Some((tok, quoted)) = scan.get_token() {
273            if !tok.is_empty() {
274                let (text, quoted) = scan.substitute(tok, quoted);
275                row.push(Value { text, quoted }.to_f64(vars)?);
276            }
277            if scan.last_delim == Delim::Char(b'|') {
278                rows.push(std::mem::take(&mut row));
279            }
280        }
281        if !row.is_empty() || rows.is_empty() {
282            rows.push(row);
283        }
284        Ok(rows)
285    }
286
287    /// A flat numeric vector (kVs, taps, ZIPV, ...).
288    pub fn to_vector(&self, vars: Option<&VarMap>) -> Result<Vec<f64>, ValueError> {
289        Ok(self.to_rows(vars)?.into_iter().flatten().collect())
290    }
291
292    /// A list of string items (`buses=(b1, b2)`, `conns=(wye delta)`).
293    pub fn to_string_list(&self, vars: Option<&VarMap>) -> Vec<String> {
294        let mut out = Vec::new();
295        let mut scan = Scanner::new(&self.text, vars);
296        while let Some((tok, quoted)) = scan.get_token() {
297            if !tok.is_empty() {
298                out.push(scan.substitute(tok, quoted).0);
299            }
300        }
301        out
302    }
303
304    /// TParser::ParseAsBusName: `name.1.2.0` into name and node list.
305    pub fn to_bus_spec(&self) -> BusSpec {
306        let text = self.text.trim();
307        match text.split_once('.') {
308            None => BusSpec {
309                name: text.to_string(),
310                nodes: Vec::new(),
311            },
312            Some((name, rest)) => BusSpec {
313                name: name.trim().to_string(),
314                nodes: rest
315                    .split('.')
316                    .map(|n| n.trim().parse::<i32>().unwrap_or(-1))
317                    .collect(),
318            },
319        }
320    }
321
322    /// OpenDSS boolean: leading `y`/`t`/`1` is true, anything else false.
323    pub fn to_bool(&self) -> bool {
324        matches!(
325            self.text.bytes().next().map(|b| b.to_ascii_lowercase()),
326            Some(b'y' | b't' | b'1')
327        )
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    fn params(line: &str) -> Vec<(Option<String>, String, bool)> {
336        let mut scan = Scanner::new(line, None);
337        let mut out = Vec::new();
338        while let Some(p) = scan.next_param() {
339            out.push((p.name, p.value.text, p.value.quoted));
340        }
341        out
342    }
343
344    #[test]
345    fn positional_and_named() {
346        let p = params("Line.l1 bus1=a bus2=b 0.3");
347        assert_eq!(p[0], (None, "Line.l1".into(), false));
348        assert_eq!(p[1], (Some("bus1".into()), "a".into(), false));
349        assert_eq!(p[2], (Some("bus2".into()), "b".into(), false));
350        assert_eq!(p[3], (None, "0.3".into(), false));
351    }
352
353    #[test]
354    fn spaces_around_equals() {
355        assert_eq!(params("a = b"), params("a=b"));
356        assert_eq!(params("a =b"), params("a= b"));
357    }
358
359    #[test]
360    fn comma_separates() {
361        let p = params("conns=(wye, delta)");
362        assert_eq!(p[0], (Some("conns".into()), "wye, delta".into(), true));
363    }
364
365    #[test]
366    fn quote_pairs() {
367        for (open, close) in [('(', ')'), ('"', '"'), ('\'', '\''), ('[', ']'), ('{', '}')] {
368            let line = format!("x={open}1 2 3{close}");
369            let p = params(&line);
370            assert_eq!(p[0], (Some("x".into()), "1 2 3".into(), true), "{open}");
371        }
372    }
373
374    #[test]
375    fn comments_stop_the_line() {
376        assert_eq!(params("a=1 ! trailing").len(), 1);
377        assert_eq!(params("a=1 // trailing").len(), 1);
378        assert_eq!(params("a=1!glued").len(), 1);
379        assert!(params("! whole line").first().unwrap().1.is_empty());
380    }
381
382    #[test]
383    fn quoted_values_keep_comment_markers() {
384        let p = params(r#"a="x // y" b='x ! y' c={x // y}"#);
385        assert_eq!(p[0], (Some("a".into()), "x // y".into(), true));
386        assert_eq!(p[1], (Some("b".into()), "x ! y".into(), true));
387        assert_eq!(p[2], (Some("c".into()), "x // y".into(), true));
388    }
389
390    #[test]
391    fn slash_alone_is_not_a_comment() {
392        let p = params("x=a/b");
393        assert_eq!(p[0], (Some("x".into()), "a/b".into(), false));
394    }
395
396    #[test]
397    fn rpn_value() {
398        let v = Value {
399            text: "8 1000 /".into(),
400            quoted: true,
401        };
402        assert_eq!(v.to_f64(None), Ok(0.008));
403        let bare = Value::new("3.5");
404        assert_eq!(bare.to_f64(None), Ok(3.5));
405        let bad = Value::new("abc");
406        assert!(bad.to_f64(None).is_err());
407    }
408
409    #[test]
410    fn quoted_single_number_is_rpn() {
411        let v = Value {
412            text: "42".into(),
413            quoted: true,
414        };
415        assert_eq!(v.to_f64(None), Ok(42.0));
416    }
417
418    #[test]
419    fn matrix_rows() {
420        let v = Value {
421            text: "0.088 | 0.031 0.090 | 0.030 0.031 0.088".into(),
422            quoted: true,
423        };
424        let rows = v.to_rows(None).unwrap();
425        assert_eq!(rows.len(), 3);
426        assert_eq!(rows[0], vec![0.088]);
427        assert_eq!(rows[2], vec![0.030, 0.031, 0.088]);
428    }
429
430    #[test]
431    fn vector_with_commas() {
432        let v = Value {
433            text: "7.2, 0.24".into(),
434            quoted: true,
435        };
436        assert_eq!(v.to_vector(None).unwrap(), vec![7.2, 0.24]);
437    }
438
439    #[test]
440    fn rpn_inside_vector() {
441        let v = Value {
442            text: "1 \"8 1000 /\"".into(),
443            quoted: true,
444        };
445        assert_eq!(v.to_vector(None).unwrap(), vec![1.0, 0.008]);
446    }
447
448    #[test]
449    fn bus_dotting() {
450        let b = Value::new("632.1.2.3.0").to_bus_spec();
451        assert_eq!(b.name, "632");
452        assert_eq!(b.nodes, vec![1, 2, 3, 0]);
453        let plain = Value::new("sourcebus").to_bus_spec();
454        assert_eq!(plain.name, "sourcebus");
455        assert!(plain.nodes.is_empty());
456        let bad = Value::new("b.1.x").to_bus_spec();
457        assert_eq!(bad.nodes, vec![1, -1]);
458    }
459
460    #[test]
461    fn var_substitution() {
462        let mut vars = VarMap::new();
463        vars.insert("@kv".into(), "12.47".into());
464        vars.insert("@bus".into(), "632".into());
465        vars.insert("@expr".into(), "{2 3 *}".into());
466        let mut scan = Scanner::new("kv=@kv bus1=@bus.1.2 x=@expr y=@undef", Some(&vars));
467        let p1 = scan.next_param().unwrap();
468        assert_eq!(p1.value.text, "12.47");
469        let p2 = scan.next_param().unwrap();
470        assert_eq!(p2.value.text, "632.1.2");
471        let p3 = scan.next_param().unwrap();
472        assert_eq!(p3.value.text, "2 3 *");
473        assert!(p3.value.quoted);
474        assert_eq!(p3.value.to_f64(Some(&vars)), Ok(6.0));
475        let p4 = scan.next_param().unwrap();
476        assert_eq!(p4.value.text, "@undef");
477    }
478
479    #[test]
480    fn string_list() {
481        let v = Value {
482            text: "b1, b2".into(),
483            quoted: true,
484        };
485        assert_eq!(v.to_string_list(None), vec!["b1", "b2"]);
486    }
487
488    #[test]
489    fn booleans() {
490        assert!(Value::new("yes").to_bool());
491        assert!(Value::new("Y").to_bool());
492        assert!(Value::new("true").to_bool());
493        assert!(Value::new("1").to_bool());
494        assert!(!Value::new("no").to_bool());
495        assert!(!Value::new("false").to_bool());
496        assert!(!Value::new("").to_bool());
497    }
498}