powerio/format/powerworld/
auxiliary.rs

1//! The generic auxiliary file grammar: parse any `.aux` into [`AuxFile`] and
2//! serialize it back.
3//!
4//! This layer knows the file format and nothing about power systems. The
5//! grammar follows the official guide ("Auxiliary File Format for Simulator
6//! 24", PowerWorld Corporation): a file is a sequence of `DATA` and `SCRIPT`
7//! sections; both the legacy header (`DATA Name(Object, [fields], CSV, NO)`)
8//! and the concise header (`Object Name(fields)`) are read; field lists and
9//! value rows may span lines; `//` starts a comment anywhere outside quotes;
10//! `<SUBDATA Type> ... </SUBDATA>` blocks attach to the value row above them
11//! and their interior lines are kept verbatim.
12//!
13//! [`write_aux`] emits a canonical form: legacy headers, space delimited
14//! values, one row per line. Canonical output is idempotent (parsing it and
15//! writing again reproduces it byte for byte) but does not preserve the
16//! source's whitespace or comments; the byte exact same format round trip
17//! comes from the retained source (see [`crate::write_as`]).
18
19use std::fmt::Write as _;
20
21use crate::{Error, Result};
22
23const FMT: &str = "PowerWorld .aux";
24
25/// A parsed auxiliary file: the ordered `DATA` and `SCRIPT` sections.
26#[derive(Debug, Clone, PartialEq)]
27pub struct AuxFile {
28    pub sections: Vec<AuxSection>,
29}
30
31impl AuxFile {
32    /// The `DATA` sections, in file order.
33    pub fn data(&self) -> impl Iterator<Item = &AuxObject> {
34        self.sections.iter().filter_map(|s| match s {
35            AuxSection::Data(d) => Some(d),
36            AuxSection::Script(_) => None,
37        })
38    }
39
40    /// The `DATA` sections for one object type (a type may appear more than
41    /// once with different field lists; ACTIVSg exports carry two `Branch`
42    /// blocks, lines and transformers).
43    pub fn data_of<'a>(&'a self, object_type: &'a str) -> impl Iterator<Item = &'a AuxObject> {
44        self.data()
45            .filter(move |d| d.object_type.eq_ignore_ascii_case(object_type))
46    }
47}
48
49/// One section of an auxiliary file.
50#[derive(Debug, Clone, PartialEq)]
51pub enum AuxSection {
52    Data(AuxObject),
53    Script(AuxScript),
54}
55
56/// A `SCRIPT` section, retained verbatim: powerio executes nothing.
57#[derive(Debug, Clone, PartialEq)]
58pub struct AuxScript {
59    pub name: Option<String>,
60    /// Body lines between the braces, byte for byte.
61    pub lines: Vec<String>,
62}
63
64/// One `DATA` section: an object type, its declared field list, and the rows.
65#[derive(Debug, Clone, PartialEq)]
66pub struct AuxObject {
67    pub object_type: String,
68    /// Optional section name (callable from `LoadData` scripts).
69    pub data_name: Option<String>,
70    /// Declared fields, in order, location suffixes preserved (`BusNum:1`).
71    pub fields: Vec<String>,
72    /// `CREATE_IF_NOT_FOUND` argument when the header carried one
73    /// (`YES`/`NO`/`PROMPT`).
74    pub create_if_not_found: Option<String>,
75    pub rows: Vec<AuxRow>,
76}
77
78impl AuxObject {
79    /// Position of `field` in the declared field list (case insensitive).
80    #[must_use]
81    pub fn field_index(&self, field: &str) -> Option<usize> {
82        self.fields
83            .iter()
84            .position(|f| f.eq_ignore_ascii_case(field))
85    }
86}
87
88/// One value row of a `DATA` section, with any `SUBDATA` blocks that follow it.
89#[derive(Debug, Clone, PartialEq, Default)]
90pub struct AuxRow {
91    /// One value per declared field, quotes removed.
92    pub values: Vec<String>,
93    pub subdata: Vec<AuxSubData>,
94}
95
96/// A `<SUBDATA Type> ... </SUBDATA>` block. The interior format is fixed per
97/// subobject type (some are free text, some are per line records), so the
98/// lines are kept verbatim.
99#[derive(Debug, Clone, PartialEq)]
100pub struct AuxSubData {
101    pub name: String,
102    pub lines: Vec<String>,
103}
104
105// ---- Parser -----------------------------------------------------------------
106
107/// Parse auxiliary file `text` into an [`AuxFile`].
108///
109/// # Errors
110/// [`Error::FormatRead`] with the line number on malformed input: an
111/// unterminated section, a row with more values than declared fields, a row cut
112/// short at the closing brace, `SUBDATA` with no owning row, or an unknown
113/// file type specifier.
114pub fn parse_aux(text: &str) -> Result<AuxFile> {
115    Parser {
116        lines: text.lines().collect(),
117        pos: 0,
118    }
119    .parse()
120}
121
122struct Parser<'a> {
123    lines: Vec<&'a str>,
124    pos: usize,
125}
126
127impl<'a> Parser<'a> {
128    fn parse(mut self) -> Result<AuxFile> {
129        let mut sections = Vec::new();
130        while let Some(line) = self.peek_content() {
131            if first_word_is(line, "SCRIPT") {
132                sections.push(AuxSection::Script(self.script()?));
133            } else {
134                sections.push(AuxSection::Data(self.data()?));
135            }
136        }
137        Ok(AuxFile { sections })
138    }
139
140    /// The next line with content after comment stripping, without consuming
141    /// it. Skips blank and comment lines.
142    fn peek_content(&mut self) -> Option<&'a str> {
143        while self.pos < self.lines.len() {
144            let stripped = strip_comment(self.lines[self.pos]).trim();
145            if !stripped.is_empty() {
146                return Some(stripped);
147            }
148            self.pos += 1;
149        }
150        None
151    }
152
153    fn err(&self, message: impl Into<String>) -> Error {
154        Error::FormatRead {
155            format: FMT,
156            message: format!(
157                "line {}: {}",
158                self.pos.min(self.lines.len()),
159                message.into()
160            ),
161        }
162    }
163
164    /// Consume a `SCRIPT Name { ... }` section, body verbatim.
165    fn script(&mut self) -> Result<AuxScript> {
166        let header = strip_comment(self.lines[self.pos]).trim().to_string();
167        self.pos += 1;
168        let mut rest = header["SCRIPT".len()..].trim();
169        let brace_in_header = rest.ends_with('{');
170        if brace_in_header {
171            rest = rest[..rest.len() - 1].trim();
172        }
173        let name = (!rest.is_empty()).then(|| rest.to_string());
174        if !brace_in_header {
175            loop {
176                let Some(line) = self.next_line() else {
177                    return Err(self.err("SCRIPT section with no `{`"));
178                };
179                let t = strip_comment(line).trim();
180                if t == "{" {
181                    break;
182                }
183                if !t.is_empty() {
184                    return Err(self.err("expected `{` after SCRIPT header"));
185                }
186            }
187        }
188        let mut lines = Vec::new();
189        loop {
190            let Some(line) = self.next_line() else {
191                return Err(self.err("unterminated SCRIPT section"));
192            };
193            if line.trim() == "}" {
194                return Ok(AuxScript { name, lines });
195            }
196            lines.push(line.to_string());
197        }
198    }
199
200    fn next_line(&mut self) -> Option<&'a str> {
201        let line = self.lines.get(self.pos).copied();
202        if line.is_some() {
203            self.pos += 1;
204        }
205        line
206    }
207
208    /// Consume a `DATA` section, legacy or concise header.
209    fn data(&mut self) -> Result<AuxObject> {
210        let header = self.header_text()?;
211        let close = header
212            .rfind(')')
213            .ok_or_else(|| self.err("header has no `)`"))?;
214        let brace_in_header = match header[close + 1..].trim() {
215            "" => false,
216            "{" => true,
217            other => {
218                return Err(self.err(format!("unexpected text after section header: {other:?}")));
219            }
220        };
221        let (object_type, data_name, fields, csv, create_if_not_found) =
222            self.split_header(&header[..=close])?;
223        if !brace_in_header {
224            self.expect_open_brace()?;
225        }
226        let rows = self.body(&fields, csv)?;
227        Ok(AuxObject {
228            object_type,
229            data_name,
230            fields,
231            create_if_not_found,
232            rows,
233        })
234    }
235
236    /// Accumulate header lines (comments stripped) until the parentheses
237    /// balance.
238    fn header_text(&mut self) -> Result<String> {
239        let start = self.pos;
240        let mut text = String::new();
241        let mut depth = 0i32;
242        let mut opened = false;
243        while let Some(line) = self.next_line() {
244            let stripped = strip_comment(line).trim();
245            if !text.is_empty() && !stripped.is_empty() {
246                text.push(' ');
247            }
248            text.push_str(stripped);
249            let mut in_quote = false;
250            for c in stripped.chars() {
251                match c {
252                    '"' => in_quote = !in_quote,
253                    '(' if !in_quote => {
254                        depth += 1;
255                        opened = true;
256                    }
257                    ')' if !in_quote => depth -= 1,
258                    _ => {}
259                }
260            }
261            if opened && depth == 0 {
262                return Ok(text);
263            }
264            if self.pos - start > 200 {
265                break;
266            }
267        }
268        Err(self.err("unterminated section header (unbalanced parentheses)"))
269    }
270
271    /// Split a balanced header into its parts. Legacy form:
272    /// `DATA Name(Object, [fields], specifier, create)`. Concise form:
273    /// `Object Name(fields)`.
274    #[allow(clippy::type_complexity)]
275    fn split_header(
276        &self,
277        header: &str,
278    ) -> Result<(String, Option<String>, Vec<String>, bool, Option<String>)> {
279        let open = header
280            .find('(')
281            .ok_or_else(|| self.err("header has no `(`"))?;
282        let close = header
283            .rfind(')')
284            .ok_or_else(|| self.err("header has no `)`"))?;
285        if close <= open {
286            return Err(self.err("header `)` precedes `(`"));
287        }
288        let before = header[..open].trim();
289        let inner = &header[open + 1..close];
290        let legacy = first_word_is(before, "DATA");
291
292        if legacy {
293            let data_name = before["DATA".len()..].trim();
294            let data_name = (!data_name.is_empty()).then(|| data_name.to_string());
295            // Object type, then `[fields]`, then optional specifier and
296            // create_if_not_found.
297            let bracket_open = inner
298                .find('[')
299                .ok_or_else(|| self.err("legacy DATA header has no `[fields]` list"))?;
300            let bracket_close = inner
301                .rfind(']')
302                .ok_or_else(|| self.err("legacy DATA header has no closing `]`"))?;
303            let object_type = inner[..bracket_open].trim().trim_end_matches(',').trim();
304            if object_type.is_empty() {
305                return Err(self.err("legacy DATA header has no object type"));
306            }
307            let fields = split_fields(&inner[bracket_open + 1..bracket_close]);
308            if fields.is_empty() {
309                return Err(self.err("empty field list"));
310            }
311            let mut csv = false;
312            let mut create = None;
313            for arg in inner[bracket_close + 1..].split(',') {
314                let arg = arg.trim();
315                if arg.is_empty() {
316                    continue;
317                }
318                match arg.to_ascii_uppercase().as_str() {
319                    "AUXCSV" | "CSV" | "CSVAUX" => csv = true,
320                    "AUXDEF" | "DEF" => {}
321                    "YES" | "NO" | "PROMPT" => create = Some(arg.to_ascii_uppercase()),
322                    other => {
323                        return Err(self.err(format!("unknown DATA header argument {other:?}")));
324                    }
325                }
326            }
327            Ok((object_type.to_string(), data_name, fields, csv, create))
328        } else {
329            // Concise: `object_type [DataName](fields)`, always space delimited.
330            let mut words = before.split_whitespace();
331            let object_type = words
332                .next()
333                .ok_or_else(|| self.err("concise header has no object type"))?
334                .to_string();
335            let data_name = words.next().map(str::to_string);
336            if words.next().is_some() {
337                return Err(self.err("concise header has more than two words before `(`"));
338            }
339            let fields = split_fields(inner);
340            if fields.is_empty() {
341                return Err(self.err("empty field list"));
342            }
343            Ok((object_type, data_name, fields, false, None))
344        }
345    }
346
347    fn expect_open_brace(&mut self) -> Result<()> {
348        loop {
349            let Some(line) = self.next_line() else {
350                return Err(self.err("DATA section with no `{`"));
351            };
352            let t = strip_comment(line).trim();
353            if t == "{" {
354                return Ok(());
355            }
356            if !t.is_empty() {
357                return Err(self.err(format!("expected `{{` after DATA header, found {t:?}")));
358            }
359        }
360    }
361
362    /// Parse the value rows between the braces. A row may span lines; it is
363    /// complete when it has one value per declared field. `SUBDATA` blocks
364    /// attach to the row above them.
365    fn body(&mut self, fields: &[String], csv: bool) -> Result<Vec<AuxRow>> {
366        let mut rows: Vec<AuxRow> = Vec::new();
367        let mut pending: Vec<String> = Vec::new();
368        loop {
369            let Some(line) = self.next_line() else {
370                return Err(self.err("unterminated DATA section (no closing `}`)"));
371            };
372            let trimmed = line.trim();
373            if trimmed == "}" {
374                if !pending.is_empty() {
375                    return Err(self.err(format!(
376                        "row ended with {} of {} values at the closing brace",
377                        pending.len(),
378                        fields.len()
379                    )));
380                }
381                return Ok(rows);
382            }
383            if let Some(name) = subdata_open(trimmed) {
384                if !pending.is_empty() {
385                    return Err(self.err(format!(
386                        "SUBDATA after an incomplete row ({} of {} values)",
387                        pending.len(),
388                        fields.len()
389                    )));
390                }
391                let subdata = self.subdata(name)?;
392                let Some(row) = rows.last_mut() else {
393                    return Err(self.err("SUBDATA before any value row"));
394                };
395                row.subdata.push(subdata);
396                continue;
397            }
398            let stripped = strip_comment(line).trim();
399            if stripped.is_empty() {
400                continue;
401            }
402            split_values_into(stripped, csv, &mut pending);
403            if pending.len() > fields.len() {
404                return Err(self.err(format!(
405                    "row has {} values for {} declared fields",
406                    pending.len(),
407                    fields.len()
408                )));
409            }
410            if pending.len() == fields.len() {
411                rows.push(AuxRow {
412                    values: std::mem::take(&mut pending),
413                    subdata: Vec::new(),
414                });
415            }
416        }
417    }
418
419    /// Collect a `<SUBDATA name>` block's interior verbatim.
420    fn subdata(&mut self, name: &str) -> Result<AuxSubData> {
421        let mut lines = Vec::new();
422        loop {
423            let Some(line) = self.next_line() else {
424                return Err(self.err(format!("unterminated SUBDATA {name}")));
425            };
426            if line.trim().eq_ignore_ascii_case("</SUBDATA>") {
427                return Ok(AuxSubData {
428                    name: name.to_string(),
429                    lines,
430                });
431            }
432            lines.push(line.to_string());
433        }
434    }
435}
436
437/// The `<SUBDATA name>` opener's name, if `line` is one.
438fn subdata_open(line: &str) -> Option<&str> {
439    let rest = line.strip_prefix("<SUBDATA")?;
440    let rest = rest.strip_suffix('>')?;
441    let name = rest.trim();
442    (!name.is_empty()).then_some(name)
443}
444
445/// Does `text` start with `word` as a whole word (case insensitive)?
446fn first_word_is(text: &str, word: &str) -> bool {
447    // `get` instead of indexing: `word.len()` may land inside a multibyte
448    // character on arbitrary input text, where slicing would panic; a non
449    // boundary there correctly means the keyword is not present whole.
450    text.get(..word.len())
451        .is_some_and(|head| head.eq_ignore_ascii_case(word))
452        && !text[word.len()..]
453            .chars()
454            .next()
455            .is_some_and(|c| c.is_alphanumeric() || c == '_')
456}
457
458/// Truncate `line` at the first `//` outside quotes.
459fn strip_comment(line: &str) -> &str {
460    let bytes = line.as_bytes();
461    let mut in_quote = false;
462    for i in 0..bytes.len() {
463        match bytes[i] {
464            b'"' => in_quote = !in_quote,
465            b'/' if !in_quote && bytes.get(i + 1) == Some(&b'/') => return &line[..i],
466            _ => {}
467        }
468    }
469    line
470}
471
472/// Split a field list on commas, trimming each name. Empty entries (a trailing
473/// comma before a line break) are dropped.
474fn split_fields(text: &str) -> Vec<String> {
475    text.split(',')
476        .map(str::trim)
477        .filter(|f| !f.is_empty())
478        .map(str::to_string)
479        .collect()
480}
481
482/// Append the values on one line to `out`. Space delimited unless `csv`;
483/// quoted strings keep their interior (including embedded spaces and commas)
484/// and an empty quoted token (`""`) is preserved as an empty value.
485fn split_values_into(line: &str, csv: bool, out: &mut Vec<String>) {
486    if csv {
487        // Split on top-level commas, then unquote each piece. Whitespace
488        // around a piece is insignificant; the quoted interior is verbatim.
489        let mut start = 0;
490        let mut in_quote = false;
491        let bytes = line.as_bytes();
492        for i in 0..=bytes.len() {
493            let at_end = i == bytes.len();
494            if at_end || (bytes[i] == b',' && !in_quote) {
495                let piece = line[start..i].trim();
496                let value = piece
497                    .strip_prefix('"')
498                    .and_then(|p| p.strip_suffix('"'))
499                    .unwrap_or(piece);
500                out.push(value.to_string());
501                start = i + 1;
502            } else if bytes[i] == b'"' {
503                in_quote = !in_quote;
504            }
505        }
506        return;
507    }
508    let mut cur = String::new();
509    let mut in_quote = false;
510    let mut started = false; // a token has begun, including an empty quoted one
511    for c in line.chars() {
512        match c {
513            '"' => {
514                in_quote = !in_quote;
515                started = true;
516            }
517            c if c.is_whitespace() && !in_quote => {
518                if started {
519                    out.push(std::mem::take(&mut cur));
520                    started = false;
521                }
522            }
523            c => {
524                cur.push(c);
525                started = true;
526            }
527        }
528    }
529    if started {
530        out.push(cur);
531    }
532}
533
534// ---- Canonical writer -------------------------------------------------------
535
536/// Serialize an [`AuxFile`] in canonical form: legacy headers, space delimited
537/// values, one row per line, two space indentation. Idempotent under
538/// `parse_aux`.
539#[must_use]
540pub fn write_aux(file: &AuxFile) -> String {
541    let mut s = String::new();
542    for section in &file.sections {
543        match section {
544            AuxSection::Data(d) => write_object(&mut s, d),
545            AuxSection::Script(sc) => {
546                match &sc.name {
547                    Some(name) => {
548                        let _ = writeln!(s, "SCRIPT {name}");
549                    }
550                    None => s.push_str("SCRIPT\n"),
551                }
552                s.push_str("{\n");
553                for line in &sc.lines {
554                    s.push_str(line);
555                    s.push('\n');
556                }
557                s.push_str("}\n\n");
558            }
559        }
560    }
561    s
562}
563
564fn write_object(s: &mut String, d: &AuxObject) {
565    // Legacy syntax puts the optional section name between DATA and `(`.
566    match &d.data_name {
567        Some(name) => {
568            let _ = write!(s, "DATA {name}");
569        }
570        None => s.push_str("DATA "),
571    }
572    let _ = write!(s, "({}, [{}]", d.object_type, d.fields.join(", "));
573    if let Some(create) = &d.create_if_not_found {
574        let _ = write!(s, ", AUXDEF, {create}");
575    }
576    s.push_str(")\n{\n");
577    for row in &d.rows {
578        s.push_str("  ");
579        for (i, v) in row.values.iter().enumerate() {
580            if i > 0 {
581                s.push(' ');
582            }
583            push_value(s, v);
584        }
585        s.push('\n');
586        for sub in &row.subdata {
587            let _ = writeln!(s, "  <SUBDATA {}>", sub.name);
588            for line in &sub.lines {
589                s.push_str(line);
590                s.push('\n');
591            }
592            s.push_str("  </SUBDATA>\n");
593        }
594    }
595    s.push_str("}\n\n");
596}
597
598/// Write one value, quoting when the bare token would not survive a re-read:
599/// empty, embedded whitespace or comma, or a `//` that would read as a comment.
600/// An embedded `"` is replaced with a space before quoting: the tokenizer toggles
601/// on `"` with no un-escaping, so a literal quote would close the field early and
602/// shift every later column.
603fn push_value(s: &mut String, v: &str) {
604    let needs_quotes = v.is_empty()
605        || v.contains(char::is_whitespace)
606        || v.contains(',')
607        || v.contains("//")
608        || v.contains('"');
609    if needs_quotes {
610        s.push('"');
611        for ch in v.chars() {
612            s.push(if ch == '"' { ' ' } else { ch });
613        }
614        s.push('"');
615    } else {
616        s.push_str(v);
617    }
618}
619
620#[cfg(test)]
621mod tests {
622    use super::*;
623
624    #[test]
625    fn strip_comment_keeps_double_slash_inside_quotes() {
626        assert_eq!(
627            strip_comment(r#"1 "http://example" // trailing"#),
628            r#"1 "http://example" "#
629        );
630    }
631
632    #[test]
633    fn csv_split_keeps_quoted_commas_and_empty_values() {
634        let mut out = Vec::new();
635        split_values_into(r#""a,b", "", plain"#, true, &mut out);
636        assert_eq!(out, vec!["a,b", "", "plain"]);
637    }
638
639    #[test]
640    fn whitespace_split_keeps_quoted_comment_marker() {
641        let mut out = Vec::new();
642        split_values_into(r#"one "two // three" four"#, false, &mut out);
643        assert_eq!(out, vec!["one", "two // three", "four"]);
644    }
645}
powerio/format/powerworld/auxiliary.rs

powerio/format/powerworld/
auxiliary.rs