Skip to main content

powerio/format/powerworld/
pwb.rs

1//! Read PowerWorld `.pwb` binary case files (read only).
2//!
3//! The format is undocumented; everything here was established by
4//! differential analysis of `.pwb`/`.aux` sibling exports of the ACTIVSg
5//! synthetic grids and is recorded with its evidence in
6//! `powerio/src/format/powerworld/FORMAT.md`. The reader
7//! decodes the power flow core tables (buses, loads, generators, shunts,
8//! branches) and stops there; the rest of the file (substations, areas,
9//! contingencies, options) is inventoried in the docs and left undecoded.
10//!
11//! Robustness rule: every record is validated as it is parsed (bus
12//! references must exist, floats must be finite and in range, record flags
13//! must be values this reader has seen and verified). A file that does not
14//! match the validated layout fails loudly; nothing is guessed silently.
15//!
16//! Supported header constants: 338, 368, 425, 483, 508, 537, 550, 551, and 554.
17//! These constants gate only the writer era; a recognized constant still has
18//! to pass the table walk. Constants 338/368/425 use the older generator record
19//! (`bus`, ID, f32 block), 483/537/550/551 use the regulated bus record, 508
20//! has been observed with both generator families, and 554 uses the regulated
21//! record without the 2021 era presence byte. The bus, load, shunt, and branch
22//! heads are more general: their flag words are Delphi field presence bitmasks,
23//! so one decoded head model admits the observed 0x06, 0x26, and 0x66 families
24//! as long as the later table walk still validates.
25//!
26//! The emerging structure is useful but bounded: the file is a sequence of
27//! count-word tables separated by writer metadata, each record starts with a
28//! small stable head, optional fields are controlled by bitmasks or short kind
29//! markers, and long tails are skipped only after anchors prove the record kind.
30//! That gives a general path for new vintages without guessing at fields.
31//!
32//! To add a new vintage, start with the smallest stable facts: header words,
33//! bus flag census, table count positions, record anchors, and companion
34//! export parity. Prefer widening a presence bit or table glue window only
35//! after a full record walk still validates every later table. A new layout
36//! belongs behind its own probe until a sibling `.aux`, `.raw`, `.epc`, or
37//! `.m` file proves that it shares an existing record family.
38//!
39//! The table search prices the format's structure (no field dictionary, so
40//! every table is located by validating record walks behind count word
41//! candidates), and the probe layer is built so that search allocates only
42//! for records it accepts: probe rejections carry `&'static str` reasons
43//! instead of formatted strings ([`Probe`]), bus membership is a bitmap
44//! over the id range instead of a hash set ([`BusIdSet`]), and record runs
45//! are cached by first record offset ([`Run`]) so count word candidates
46//! that point at the same records share one walk. Issue #99 records the
47//! measurements.
48//!
49//! Known limits, documented rather than guessed:
50//!
51//! - Status bytes: the 483 era generator record is the one located,
52//!   validated status in the corpus (bit 0 of the byte one past the f32
53//!   block, proven against the 94 open machines in the Texas7k aux). Every
54//!   other device in every available case is in service, so no other out of
55//!   service encoding is validated and those devices read as in service.
56//!   The load record's post ID byte, once treated as a status, is 0x00 in
57//!   the 425 era files and 0x01 in the 2021 era ones with every load Closed
58//!   in both, so it is no status byte; the 425 era generator, the shunt,
59//!   and the branch status bytes are unlocated.
60//! - Transformer phase shift: every available case has zero phase, so the
61//!   field's offset is unknown; transformers read with `shift = 0`.
62//! - The slack designation is not stored in the bus record; buses read as
63//!   PQ/PV (from the generators) and no bus is marked `Ref`.
64//! - The system MVA base is not decoded; per unit values are converted with
65//!   the 100 MVA default.
66//! - The shunt record's nominal MW slot is unlocated: every available case
67//!   stores zero shunt MW, and the slot once assumed to hold it carries 0.99
68//!   in the 2016 export (a regulation target, not a power). Shunts read with
69//!   `g = 0` and only the nominal MVAr is decoded.
70//! - Branch ratings beyond the inline slots (two or three, by flag bit 1)
71//!   are zero in every available case; the trailing rating block is
72//!   validated as zero filled f32s and read as zero ratings.
73//! - Bus voltage limits are not decoded; buses read with the 1.1/0.9
74//!   defaults the aux reader also falls back to when the per rating set
75//!   fields are absent.
76//! - Branch angle limits have no PowerWorld field at all; branches read
77//!   with the +-360 degree placeholder every reader uses for absence.
78
79use std::cell::{Cell, RefCell};
80use std::collections::HashMap;
81use std::collections::hash_map::Entry;
82
83use super::map::{BRANCH_DEVICE_TYPE, LINE_CIRCUIT, derive_bus_kinds};
84use crate::network::{
85    Branch, Bus, BusId, BusType, Extras, Generator, Load, Network, Shunt, SourceFormat,
86};
87use crate::{Error, Result};
88
89const FMT: &str = "PowerWorld .pwb";
90
91/// The system MVA base used to convert the file's per unit f32 storage into
92/// physical units. The base itself is not decoded (see the module docs);
93/// every available sibling case uses PowerWorld's 100 MVA default.
94const MVA_BASE: f64 = 100.0;
95
96/// How far ahead a bounded scan may look for the next record or table. Large
97/// enough for every observed record tail, small enough that a derailed parse
98/// fails fast instead of wandering.
99const RESYNC_WINDOW: usize = 1024;
100
101/// Cap on table-location probes across the whole chain search. A count word is
102/// attacker-controlled, so a crafted file can pack the header window with
103/// valid-looking table heads that never complete a chain and force the nested
104/// bus × load × generator × shunt × branch search to run to exhaustion (a bit-4
105/// tail on the last bus record stretches the load scan to `BLOB_WINDOW`, so the
106/// blowup is multiplicative). A probe is one candidate `(count, glue)` attempt
107/// or one scanned record head position, charged at every scan site. The
108/// largest corpus file (Texas7k, 13.7 MB) spends 16M probes and a 64 KB
109/// truncation fixture peaks at 42M; exceeding the cap means the bytes are not
110/// a decodable layout, so the search stops and reports a read error instead
111/// of spinning.
112const SEARCH_PROBE_BUDGET: u64 = 128_000_000;
113
114/// Shared probe counter for one `parse_pwb` call. `tick` charges one probe and
115/// returns whether the budget still has room; `exhausted` reports afterward
116/// whether the search stopped because it ran out.
117struct SearchBudget(Cell<u64>);
118
119impl SearchBudget {
120    fn new() -> Self {
121        Self(Cell::new(0))
122    }
123
124    fn tick(&self) -> bool {
125        let spent = self.0.get().saturating_add(1);
126        self.0.set(spent);
127        spent <= SEARCH_PROBE_BUDGET
128    }
129
130    fn exhausted(&self) -> bool {
131        self.0.get() > SEARCH_PROBE_BUDGET
132    }
133}
134
135/// The probe layer's error type. Probe rejections are pure control flow (the
136/// table search discards them wholesale and the loud user visible errors are
137/// built at the parse boundary), so they carry a static description and never
138/// allocate; the texts document why each check exists.
139type Probe<T> = std::result::Result<T, &'static str>;
140
141/// Parse `.pwb` bytes into a [`Network`]. `name_hint` (the file stem) names
142/// the network; the binary carries no case name in the decoded region.
143///
144/// # Errors
145/// [`Error::FormatRead`] when the header is not the known magic, a record
146/// does not match the validated layouts, or a table cannot be located.
147pub fn parse_pwb(bytes: &[u8], name_hint: Option<&str>) -> Result<Network> {
148    let header_constant = expect_header(bytes)?;
149    reject_unsupported_vintage(bytes)?;
150    // The header constant pins the generator record layout wherever the
151    // corpus is unambiguous: every 425 file carries the bus + ID shape and
152    // every 483/537/550/551 file the regulated bus shape, while 508 saves
153    // exist with both (Hawaii40 against the Texas7k v21 resave), so only
154    // they try the two in sequence. Beyond pricing, this keeps the layout
155    // a file cannot carry from ever outbidding the right one in the chain
156    // search; a hypothetical file mixing eras fails loudly instead.
157    let gen_variants = match header_constant {
158        338 | 368 | 425 => GenVariants {
159            plain: true,
160            reg: false,
161            simple_reg: false,
162        },
163        508 => GenVariants {
164            plain: true,
165            reg: true,
166            simple_reg: false,
167        },
168        554 => GenVariants {
169            plain: false,
170            reg: false,
171            simple_reg: true,
172        },
173        _ => GenVariants {
174            plain: false,
175            reg: true,
176            simple_reg: false,
177        },
178    };
179    let branch_count_can_include_trailer = header_constant == 554;
180    let narrow_glue = if header_constant == 425 {
181        DeviceGlue::old_425()
182    } else {
183        DeviceGlue::wide()
184    };
185    let wide_glue = DeviceGlue::wide();
186    // The narrow bus glue window prices the common files (see
187    // bus_table_candidates); the wide retry exists so a small node level
188    // resave (a bus table under 256 records with the v21 writer's 52 byte
189    // glue) is a second slower search instead of a coverage cliff. The
190    // retry only runs on files the narrow search already failed, enumerates
191    // only the glue combinations the narrow pass could not reach, and
192    // shares the bus run cache so nothing is walked twice.
193    let bus_runs = RefCell::new(HashMap::new());
194    let budget = SearchBudget::new();
195    let found = search_table_chain(
196        bytes,
197        name_hint,
198        gen_variants,
199        branch_count_can_include_trailer,
200        &bus_runs,
201        narrow_glue,
202        false,
203        &budget,
204    )
205    .or_else(|| {
206        let retry = |wide_bus_glue, device_glue| {
207            search_table_chain(
208                bytes,
209                name_hint,
210                gen_variants,
211                branch_count_can_include_trailer,
212                &bus_runs,
213                device_glue,
214                wide_bus_glue,
215                &budget,
216            )
217        };
218        (wide_glue != narrow_glue)
219            .then(|| retry(false, wide_glue))
220            .flatten()
221            .or_else(|| retry(true, narrow_glue))
222            .or_else(|| {
223                (wide_glue != narrow_glue)
224                    .then(|| retry(true, wide_glue))
225                    .flatten()
226            })
227    });
228    found.unwrap_or_else(|| {
229        if budget.exhausted() {
230            return Err(Error::FormatRead {
231                format: FMT,
232                message: "table search exceeded its work budget; the bytes are not a \
233                              decodable .pwb layout"
234                    .into(),
235            });
236        }
237        Err(Error::FormatRead {
238            format: FMT,
239            message: "no table chain matches the validated .pwb layouts \
240                          (buses, loads, generators, shunts, branches in sequence)"
241                .into(),
242        })
243    })
244}
245
246/// Which generator record layouts the header constant admits (see
247/// [`parse_pwb`]): the 425/508 era bus + ID shape (`plain`), the 2021 era
248/// regulated bus shape (`reg`, [`read_gen_reg_record`]), and the 554 shape
249/// whose regulated bus record omits the presence byte (`simple_reg`,
250/// [`read_gen_reg_simple_record`]).
251#[derive(Clone, Copy)]
252struct GenVariants {
253    plain: bool,
254    reg: bool,
255    simple_reg: bool,
256}
257
258#[derive(Clone, Copy, PartialEq, Eq)]
259struct DeviceGlue {
260    load: usize,
261    plain_gen: usize,
262    reg_gen: usize,
263    simple_reg_gen: usize,
264}
265
266impl DeviceGlue {
267    fn old_425() -> Self {
268        Self {
269            load: 48,
270            plain_gen: 48,
271            reg_gen: 128,
272            simple_reg_gen: 128,
273        }
274    }
275
276    fn wide() -> Self {
277        Self {
278            load: 128,
279            plain_gen: 128,
280            reg_gen: 128,
281            simple_reg_gen: 128,
282        }
283    }
284}
285
286/// One full depth first search for the table chain; `None` when no chain
287/// matches. `wide_bus_glue` lifts the bus table's count gated glue window
288/// (see [`bus_table_candidates`]) for the retry pass.
289#[expect(clippy::too_many_lines, clippy::too_many_arguments)]
290fn search_table_chain(
291    bytes: &[u8],
292    name_hint: Option<&str>,
293    gen_variants: GenVariants,
294    branch_count_can_include_trailer: bool,
295    bus_runs: &RefCell<BusRuns>,
296    device_glue: DeviceGlue,
297    wide_bus_glue: bool,
298    budget: &SearchBudget,
299) -> Option<Result<Network>> {
300    // A count word can be forged by record interiors and the case
301    // description, so table location is a depth first search: a candidate at
302    // any stage is kept only if every later table parses behind it. The
303    // common dedicated shunt table path returns on the first checked chain;
304    // the optional bus tail shunt paths keep the largest checked electrical
305    // core. Wrong candidates die fast on their bounded windows; a file with
306    // no valid chain fails loudly. The run caches make the backtracking
307    // affordable: candidates pointing at the same first record share one walk
308    // however many count words and search retries reach it.
309    for (buses, bus_shunts, bus_end, last_bus_unk) in
310        bus_table_candidates(bytes, bus_runs, wide_bus_glue, budget)
311    {
312        let Some(bus_ids) = BusIdSet::new(&buses) else {
313            continue; // duplicate ids: not a real bus table
314        };
315        let mut best = None;
316        let bus_names = bus_name_map(&buses);
317        // The device and branch runs validate bus references, so their
318        // caches are scoped to one bus table candidate.
319        let load_runs = RefCell::new(HashMap::new());
320        let gen_runs = RefCell::new(HashMap::new());
321        let gen_reg_runs = RefCell::new(HashMap::new());
322        let gen_reg_simple_runs = RefCell::new(HashMap::new());
323        let shunt_runs = RefCell::new(HashMap::new());
324        let branch_runs = RefCell::new(HashMap::new());
325        // The load table's count word sits past the final bus record's
326        // undecoded tail, which a bit 4 list can stretch beyond one window
327        // (the 2030 build's lists run 1341 bytes); the seam scan honors it
328        // exactly as the intra table stepping does.
329        let load_scan_end = resync_end(bytes, bus_end, last_bus_unk & 0x10 != 0);
330        for (loads, l_end) in device_table_candidates(
331            bytes,
332            bus_end..load_scan_end,
333            &bus_ids,
334            read_load_record,
335            &load_runs,
336            device_glue.load,
337            12,
338            budget,
339        ) {
340            // The generator table reads through the record layouts the
341            // header constant admits (see parse_pwb). A file's table uses
342            // exactly one; each gets its own run cache and the structural
343            // gauntlets keep the wrong one from parsing. The 508 era is
344            // ambiguous in the corpus, so the regulated layout goes first:
345            // on Texas7k v21, the older probe can accept a false table
346            // before the true regulated table if it gets first refusal.
347            // The newer layout's table glue runs to 86 bytes in the v21
348            // resave; the older table glue reaches 104 bytes in the IEEE
349            // 24 bus save.
350            let gen_candidates = gen_variants
351                .reg
352                .then(|| {
353                    device_table_candidates(
354                        bytes,
355                        l_end..l_end.saturating_add(RESYNC_WINDOW),
356                        &bus_ids,
357                        read_gen_reg_record,
358                        &gen_reg_runs,
359                        device_glue.reg_gen,
360                        40,
361                        budget,
362                    )
363                })
364                .into_iter()
365                .flatten()
366                .chain(
367                    gen_variants
368                        .simple_reg
369                        .then(|| {
370                            device_table_candidates(
371                                bytes,
372                                l_end..l_end.saturating_add(RESYNC_WINDOW),
373                                &bus_ids,
374                                read_gen_reg_simple_record,
375                                &gen_reg_simple_runs,
376                                device_glue.simple_reg_gen,
377                                40,
378                                budget,
379                            )
380                        })
381                        .into_iter()
382                        .flatten(),
383                )
384                .chain(
385                    gen_variants
386                        .plain
387                        .then(|| {
388                            device_table_candidates(
389                                bytes,
390                                l_end..l_end.saturating_add(RESYNC_WINDOW),
391                                &bus_ids,
392                                read_gen_record,
393                                &gen_runs,
394                                device_glue.plain_gen,
395                                32,
396                                budget,
397                            )
398                        })
399                        .into_iter()
400                        .flatten(),
401                );
402            for (generators, g_end) in gen_candidates {
403                if gen_table_continues(bytes, g_end, &bus_ids, gen_variants, budget) {
404                    continue;
405                }
406                if !bus_shunts.is_empty() {
407                    if let Some(branches) = find_branch_table(
408                        bytes,
409                        g_end,
410                        &bus_ids,
411                        &bus_names,
412                        &branch_runs,
413                        branch_count_can_include_trailer,
414                        budget,
415                    ) {
416                        keep_best_chain(
417                            &mut best,
418                            chain_score(&loads, &bus_shunts, &branches, &generators),
419                            checked_network(
420                                name_hint,
421                                buses.clone(),
422                                loads.clone(),
423                                bus_shunts.clone(),
424                                branches,
425                                generators.clone(),
426                            ),
427                        );
428                    }
429                }
430                for (shunts, s_end) in device_table_candidates(
431                    bytes,
432                    g_end..g_end.saturating_add(RESYNC_WINDOW),
433                    &bus_ids,
434                    read_shunt_record,
435                    &shunt_runs,
436                    48,
437                    28,
438                    budget,
439                ) {
440                    let Some(branches) = find_branch_table(
441                        bytes,
442                        s_end,
443                        &bus_ids,
444                        &bus_names,
445                        &branch_runs,
446                        branch_count_can_include_trailer,
447                        budget,
448                    ) else {
449                        continue;
450                    };
451                    let mut shunts = shunts;
452                    extend_unique_shunts(&mut shunts, &bus_shunts);
453                    let score = chain_score(&loads, &shunts, &branches, &generators);
454                    let net = checked_network(
455                        name_hint,
456                        buses.clone(),
457                        loads.clone(),
458                        shunts,
459                        branches,
460                        generators.clone(),
461                    );
462                    keep_best_chain(&mut best, score, net);
463                }
464                if let Some(branches) = find_branch_table(
465                    bytes,
466                    g_end,
467                    &bus_ids,
468                    &bus_names,
469                    &branch_runs,
470                    branch_count_can_include_trailer,
471                    budget,
472                ) {
473                    keep_best_chain(
474                        &mut best,
475                        chain_score(&loads, &bus_shunts, &branches, &generators),
476                        checked_network(
477                            name_hint,
478                            buses.clone(),
479                            loads.clone(),
480                            bus_shunts.clone(),
481                            branches,
482                            generators.clone(),
483                        ),
484                    );
485                }
486            }
487        }
488        if let Some((_, net)) = best {
489            return Some(net);
490        }
491    }
492    None
493}
494
495/// Keep the table chain with the largest decoded electrical core.
496fn keep_best_chain(
497    best: &mut Option<(usize, Result<Network>)>,
498    score: usize,
499    net: Result<Network>,
500) {
501    let candidate_ok = net.is_ok();
502    let replace = match best.as_ref() {
503        None => true,
504        Some((best_score, best_net)) => match (best_net.is_ok(), candidate_ok) {
505            (false, true) => true,
506            (true, false) => false,
507            _ => score > *best_score,
508        },
509    };
510    if replace {
511        *best = Some((score, net));
512    }
513}
514
515/// Score a candidate table chain by decoded element count.
516fn chain_score(
517    loads: &[Load],
518    shunts: &[Shunt],
519    branches: &[Branch],
520    generators: &[Generator],
521) -> usize {
522    loads.len() + shunts.len() + branches.len() + generators.len()
523}
524
525/// Add bus tail shunts without duplicating the dedicated shunt table rows.
526fn extend_unique_shunts(shunts: &mut Vec<Shunt>, extra: &[Shunt]) {
527    for shunt in extra {
528        if !shunts.iter().any(|existing| {
529            existing.bus == shunt.bus
530                && (existing.g - shunt.g).abs() <= 1e-9
531                && (existing.b - shunt.b).abs() <= 1e-9
532        }) {
533            shunts.push(shunt.clone());
534        }
535    }
536}
537
538/// Check whether another generator record starts soon after a candidate table.
539///
540/// This rejects short prefixes when a wrong count word points into the real
541/// generator table.
542fn gen_table_continues(
543    bytes: &[u8],
544    after: usize,
545    bus_ids: &BusIdSet,
546    variants: GenVariants,
547    budget: &SearchBudget,
548) -> bool {
549    (after..after.saturating_add(RESYNC_WINDOW).min(bytes.len()))
550        .take_while(|_| budget.tick())
551        .any(|p| {
552            (variants.plain && read_gen_record(bytes, p, bus_ids).is_ok())
553                || (variants.reg && read_gen_reg_record(bytes, p, bus_ids).is_ok())
554                || (variants.simple_reg && read_gen_reg_simple_record(bytes, p, bus_ids).is_ok())
555        })
556}
557
558/// Assemble the decoded tables and run the common reference checks.
559fn checked_network(
560    name_hint: Option<&str>,
561    mut buses: Vec<Bus>,
562    loads: Vec<Load>,
563    shunts: Vec<Shunt>,
564    branches: Vec<Branch>,
565    generators: Vec<Generator>,
566) -> Result<Network> {
567    derive_bus_kinds(&mut buses, &generators);
568    let net = Network {
569        name: name_hint.unwrap_or("case").to_string(),
570        base_mva: MVA_BASE,
571        base_frequency: crate::network::DEFAULT_BASE_FREQUENCY,
572        buses,
573        loads,
574        shunts,
575        branches,
576        switches: Vec::new(),
577        generators,
578        storage: Vec::new(),
579        hvdc: Vec::new(),
580        transformers_3w: Vec::new(),
581        areas: Vec::new(),
582        solver: None,
583        source_format: SourceFormat::PowerWorldBinary,
584        source: None,
585    };
586    net.check_references(FMT).map(|()| net)
587}
588
589// ---- Cursor -----------------------------------------------------------------
590
591/// Bounds checked cursor for little endian record probes.
592struct Cur<'a> {
593    b: &'a [u8],
594    pos: usize,
595}
596
597impl<'a> Cur<'a> {
598    /// Take `n` bytes and advance the cursor.
599    fn take(&mut self, n: usize) -> Probe<&'a [u8]> {
600        let end = self.pos.checked_add(n).ok_or("truncated record")?;
601        let s = self.b.get(self.pos..end).ok_or("truncated record")?;
602        self.pos = end;
603        Ok(s)
604    }
605
606    /// Read one byte.
607    fn u8(&mut self) -> Probe<u8> {
608        Ok(self.take(1)?[0])
609    }
610    /// Read a little endian u16.
611    fn u16(&mut self) -> Probe<u16> {
612        Ok(u16::from_le_bytes(self.take(2)?.try_into().unwrap()))
613    }
614    /// Read a little endian u32.
615    fn u32(&mut self) -> Probe<u32> {
616        Ok(u32::from_le_bytes(self.take(4)?.try_into().unwrap()))
617    }
618    /// Read a little endian f32 and widen to f64.
619    fn f32(&mut self) -> Probe<f64> {
620        Ok(f64::from(f32::from_le_bytes(
621            self.take(4)?.try_into().unwrap(),
622        )))
623    }
624    /// Read a little endian f64.
625    fn f64(&mut self) -> Probe<f64> {
626        Ok(f64::from_le_bytes(self.take(8)?.try_into().unwrap()))
627    }
628
629    /// A u32 length prefixed string of printable ASCII, at most `max` bytes.
630    /// Returns the raw slice; accepted records convert it once, so the
631    /// rejected probe offsets (the overwhelming majority) never allocate.
632    fn string(&mut self, max: usize) -> Probe<&'a [u8]> {
633        let n = self.u32()? as usize;
634        if n > max {
635            return Err("string length exceeds the field maximum");
636        }
637        let s = self.take(n)?;
638        if !printable(s) {
639            return Err("string has non printable bytes");
640        }
641        Ok(s)
642    }
643
644    /// A Pascal ShortString (one length byte), printable, at most `max` bytes.
645    fn short_string(&mut self, max: usize) -> Probe<&'a [u8]> {
646        let n = self.u8()? as usize;
647        if n > max {
648            return Err("device ID length exceeds the field maximum");
649        }
650        let s = self.take(n)?;
651        if !printable(s) {
652            return Err("device ID has non printable bytes");
653        }
654        Ok(s)
655    }
656
657    /// A fixed capacity Delphi `string[2]`: one length byte plus a fixed two
658    /// byte text area (a one character value leaves the second byte unused).
659    /// Branch circuit IDs and generator IDs are stored this way; the fixed
660    /// capacity was established by the v19 file's parallel circuit records.
661    fn short_string_2(&mut self) -> Probe<&'a [u8]> {
662        let n = self.u8()? as usize;
663        if n == 0 || n > 2 {
664            return Err("fixed capacity ID length not 1 or 2");
665        }
666        let text = self.take(2)?;
667        if !printable(&text[..n]) {
668            return Err("fixed capacity ID has non printable bytes");
669        }
670        Ok(&text[..n])
671    }
672}
673
674/// How far a bit 4 record's tail blob may push the next record: the largest
675/// observed blob is 406 KiB (an ACTIVSg500 branch record, see
676/// `powerio/src/format/powerworld/FORMAT.md`), so four MiB
677/// is an order of magnitude of headroom
678/// while bounding what a crafted file can make the scan walk per record.
679const BLOB_WINDOW: usize = 4 << 20;
680
681/// How far the scan for the next record may look past `after`: one bounded
682/// window normally, the blob window when the preceding record's flag bit 4
683/// inserted a count prefixed list (the 2019+ era branch blobs run to 406 KiB
684/// and the 2030 build's bus lists past one window; the record head gauntlets
685/// keep blob bytes from forging a record).
686fn resync_end(b: &[u8], after: usize, prev_bit4: bool) -> usize {
687    if prev_bit4 {
688        after.saturating_add(BLOB_WINDOW).min(b.len())
689    } else {
690        after.saturating_add(RESYNC_WINDOW).min(b.len())
691    }
692}
693
694/// True when a probed string is printable ASCII.
695fn printable(s: &[u8]) -> bool {
696    s.iter().all(|&c| (0x20..0x7f).contains(&c))
697}
698
699/// Borrow a bounded byte slice at an absolute offset.
700fn slice_at(b: &[u8], at: usize, n: usize) -> Option<&[u8]> {
701    at.checked_add(n).and_then(|end| b.get(at..end))
702}
703
704/// Add an absolute offset without wrapping.
705fn checked_offset(at: usize, add: usize) -> Probe<usize> {
706    at.checked_add(add).ok_or("truncated record")
707}
708
709/// Reject impossible count words before walking a table.
710fn count_fits(b: &[u8], first: usize, count: usize, min_record_len: usize) -> bool {
711    let Some(remaining) = b.len().checked_sub(first) else {
712        return false;
713    };
714    count
715        .checked_mul(min_record_len)
716        .is_some_and(|min_bytes| min_bytes <= remaining)
717}
718
719/// Read a little endian u32 at an absolute offset.
720fn u32_at(b: &[u8], at: usize) -> Probe<u32> {
721    slice_at(b, at, 4)
722        .and_then(|s| <[u8; 4]>::try_from(s).ok())
723        .map(u32::from_le_bytes)
724        .ok_or("truncated record")
725}
726
727/// Read a little endian f32 at an absolute offset and widen to f64.
728fn f32_at(b: &[u8], at: usize) -> Probe<f64> {
729    slice_at(b, at, 4)
730        .and_then(|s| <[u8; 4]>::try_from(s).ok())
731        .map(f32::from_le_bytes)
732        .map(f64::from)
733        .ok_or("truncated record")
734}
735
736/// Read a length prefixed printable ASCII string at an absolute offset.
737fn string_at(b: &[u8], at: usize, max: usize) -> Probe<String> {
738    let n = u32_at(b, at)? as usize;
739    if n > max {
740        return Err("string length exceeds the field maximum");
741    }
742    let s = slice_at(b, checked_offset(at, 4)?, n).ok_or("truncated record")?;
743    if !printable(s) {
744        return Err("string has non printable bytes");
745    }
746    Ok(String::from_utf8_lossy(s).into_owned())
747}
748
749/// Validate the file head and return the writer format constant (the u64 at
750/// offset 0x08) for the layout keying in [`parse_pwb`].
751fn expect_header(b: &[u8]) -> Result<u64> {
752    const DECODED: [u64; 9] = [338, 368, 425, 483, 508, 537, 550, 551, 554];
753    let bad = || Error::FormatRead {
754        format: FMT,
755        message: "not a recognized PowerWorld binary case (header magic mismatch); \
756                  only the validated .pwb layouts are read"
757            .into(),
758    };
759    if b.len() < 0x40 {
760        return Err(bad());
761    }
762    let word = |i: usize| u64::from_le_bytes(b[i * 8..i * 8 + 8].try_into().unwrap());
763    let (a, v, c) = (word(0), word(1), word(2));
764    if a != 15000 {
765        return Err(bad());
766    }
767    // Every known PowerWorld binary starts with 15000. The next two words
768    // identify the writer family: the decoded constants cover older 0x06 bus
769    // records (338/368), the Simulator 19/20/current 425 family, the 2021
770    // regulated generator family (483/537/550/551), the mixed 508 saves, and
771    // the 554 regulated generator variant. Header admission is not trust:
772    // every table still has to pass the record probes below.
773    if c != 20 || !DECODED.contains(&v) {
774        return Err(unsupported_vintage(format!(
775            "header format words ({v}, {c}); the decoded eras are \
776             338/368/425/483/508/537/550/551/554 with 20"
777        )));
778    }
779    Ok(v)
780}
781
782/// Reject files whose leading 64 KiB carries no run of validated bus record
783/// heads before the table search reaches a generic "no chain" error. The
784/// decoded bus head families share enough structure that fewer than two
785/// validated heads in this window means an unrecognized body layout, not a
786/// sparse case.
787fn reject_unsupported_vintage(b: &[u8]) -> Result<()> {
788    let scan = b.len().min(0x10000).saturating_sub(8);
789    let mut heads = 0usize;
790    let mut at = 0x20;
791    while at < scan {
792        let Ok((_, after)) = read_bus_head(b, at) else {
793            at += 1;
794            continue;
795        };
796        heads += 1;
797        if heads >= 2 {
798            return Ok(());
799        }
800        at = after;
801    }
802    Err(unsupported_vintage(
803        "no recognized bus record layout in the leading 64 KiB",
804    ))
805}
806
807/// The single rejection path for recognized-but-undecoded writer vintages;
808/// every message names the detected evidence and points at the docs.
809fn unsupported_vintage(detail: impl std::fmt::Display) -> Error {
810    Error::FormatRead {
811        format: FMT,
812        message: format!(
813            "unsupported PowerWorld .pwb vintage: {detail}; only the validated \
814             338/368/425/483/508/537/550/551/554 layouts are decoded \
815             (see powerio/src/format/powerworld/FORMAT.md)"
816        ),
817    }
818}
819
820// ---- Search machinery --------------------------------------------------------
821
822/// Bus id membership for the record probes, the hottest check in the table
823/// search (every probed byte offset starts with one or two lookups). A
824/// bitmap over the id range replaces hashing; [`read_bus_head`] caps ids at
825/// 99,999,999 and the corpus tops out around 790,000, but a forged
826/// candidate can pair a tiny count with an id near the cap, so tables whose
827/// id range dwarfs their count fall back to a sorted list instead of
828/// allocating megabytes per forged candidate.
829enum BusIdSet {
830    Bitmap(Vec<u64>),
831    Sparse(Vec<usize>),
832}
833
834impl BusIdSet {
835    /// `None` when an id repeats: a table with duplicate bus numbers is a
836    /// forged candidate, not a real bus table.
837    fn new(buses: &[Bus]) -> Option<Self> {
838        let max = buses.iter().map(|b| b.id.0).max().unwrap_or(0);
839        let words = max / 64 + 1;
840        if words > (buses.len() * 4).max(1024) {
841            let mut ids: Vec<usize> = buses.iter().map(|b| b.id.0).collect();
842            ids.sort_unstable();
843            if ids.windows(2).any(|w| w[0] == w[1]) {
844                return None;
845            }
846            return Some(Self::Sparse(ids));
847        }
848        let mut bits = vec![0u64; words];
849        for bus in buses {
850            let (w, bit) = (bus.id.0 / 64, 1u64 << (bus.id.0 % 64));
851            if bits[w] & bit != 0 {
852                return None;
853            }
854            bits[w] |= bit;
855        }
856        Some(Self::Bitmap(bits))
857    }
858
859    /// Check whether a decoded bus id exists.
860    #[inline]
861    fn contains(&self, id: usize) -> bool {
862        match self {
863            Self::Bitmap(words) => words
864                .get(id / 64)
865                .is_some_and(|w| w & (1 << (id % 64)) != 0),
866            Self::Sparse(ids) => ids.binary_search(&id).is_ok(),
867        }
868    }
869}
870
871/// Build an uppercase bus name index for records that point by name.
872fn bus_name_map(buses: &[Bus]) -> HashMap<String, BusId> {
873    buses
874        .iter()
875        .filter_map(|bus| {
876            bus.name
877                .as_ref()
878                .map(|name| (name.trim().to_ascii_uppercase(), bus.id))
879        })
880        .collect()
881}
882
883/// The record run from one first record offset: the walk from a given offset
884/// is unique, so every count word candidate pointing at the same first
885/// record shares it. A count that is a prefix of a longer run reuses the
886/// boundaries already walked; a count past the point where extension failed
887/// is rejected without rescanning.
888struct Run<T> {
889    items: Vec<T>,
890    /// End offset just past `items[i]`.
891    ends: Vec<usize>,
892    /// Extension past `items.len()` already failed; never retried.
893    dead: bool,
894}
895
896impl<T: Clone> Run<T> {
897    /// Start a record run with its first validated record.
898    fn start(item: T, end: usize) -> Self {
899        Run {
900            items: vec![item],
901            ends: vec![end],
902            dead: false,
903        }
904    }
905
906    /// Extend to `count` records if the bytes allow, finding each next
907    /// record with `next(after, prev)` (the record tails are undecoded and
908    /// vary, so each step is a bounded scan). Returns the `count` record
909    /// prefix and the offset just past it.
910    fn prefix(
911        &mut self,
912        count: usize,
913        mut next: impl FnMut(usize, &T) -> Option<(T, usize)>,
914    ) -> Option<(Vec<T>, usize)> {
915        if count == 0 {
916            return None; // the candidate scans filter zero counts out
917        }
918        while !self.dead && self.items.len() < count {
919            let after = *self.ends.last().unwrap();
920            match next(after, self.items.last().unwrap()) {
921                Some((item, end)) => {
922                    self.items.push(item);
923                    self.ends.push(end);
924                }
925                None => self.dead = true,
926            }
927        }
928        (self.items.len() >= count).then(|| (self.items[..count].to_vec(), self.ends[count - 1]))
929    }
930}
931
932// ---- Bus table --------------------------------------------------------------
933
934struct BusHead {
935    bus: Bus,
936    shunt: Option<Shunt>,
937    /// The flags u32 between name and nominal kV: a Delphi field presence
938    /// bitmask, not a per file constant. Bit 5 set marks the Simulator 20
939    /// era record family (clear on the Simulator 19 era 0x06/0x07 family,
940    /// whose tails are shorter), bit 4 set marks a count prefixed list in
941    /// the record tail (2016/2017 era exports and the 2030 build), bit 0
942    /// clear means one extra u16 sits before the nominal kV (observed on
943    /// generator buses). The 2019+ era writers add bits 6 and 8, both per
944    /// record (the v21 resave clears bit 6 on its slack bus record; the
945    /// bit 6 tails carry a location string block), with their fields in
946    /// the undecoded tail.
947    unk: u32,
948}
949
950/// Whether a bus record flag word is one this reader decodes: base bits
951/// `0x06` plus any combination of the observed presence bits. Bit 5 changes
952/// the tail family (`0x06` vs `0x26` era), while bits 6, 8, 10, 12, and 13
953/// were admitted only after full table walks showed they leave the decoded
954/// head layout unchanged.
955fn known_bus_flags(unk: u32) -> bool {
956    unk & !0x3571 == 0x06
957}
958
959/// The record family bits of a bus flag word. One bus table cannot mix tail
960/// families, but individual records can toggle optional presence bits inside
961/// a family. Bit 5 stays in the family key because the 0x06 and 0x26 era tails
962/// differ; the other admitted bits are per record fields or skipped tails.
963fn bus_family(unk: u32) -> u32 {
964    unk & !0x3551
965}
966
967/// The bus run cache: keyed by first record offset, each entry carrying the
968/// walked `(bus, flag word)` records and the table's family bits.
969type BusRunItem = (Bus, u32, Option<Shunt>);
970type BusRuns = HashMap<usize, (Run<BusRunItem>, u32)>;
971
972/// Bus table candidates: each `(count, glue)` position after the header whose
973/// record walk succeeds, in scan order, yielding the records, the offset
974/// past the last decoded head, and the last record's flag word (the load
975/// table seam needs its bit 4). The caller validates each candidate by
976/// parsing the tables that must follow it.
977fn bus_table_candidates<'a>(
978    b: &'a [u8],
979    runs: &'a RefCell<BusRuns>,
980    wide_glue: bool,
981    budget: &'a SearchBudget,
982) -> impl Iterator<Item = (Vec<Bus>, Vec<Shunt>, usize, u32)> + 'a {
983    let limit = b.len().saturating_sub(4).min(0x10000);
984    (0x20..limit)
985        .take_while(move |_| !budget.exhausted())
986        .flat_map(move |at| {
987            let count = u32::from_le_bytes(b[at..at + 4].try_into().unwrap()) as usize;
988            // Table glue between the count and the first record varies by a few
989            // bytes per table and vintage; scan a small window for the record.
990            // The v21 resave's bus glue runs 52 bytes, past the 48 every other
991            // export observes; the first search pass widens the window only for
992            // large counts (every observed wide glue table is a node level
993            // resave with thousands of buses, and forged count words are
994            // overwhelmingly small values, so widening their window prices
995            // every file). The retry pass covers exactly the complement (the
996            // wide glues for small counts), so with the shared run cache the
997            // two passes together cost one full sweep.
998            let glues = if wide_glue {
999                (count != 0 && count < 256).then_some(49..=96)
1000            } else {
1001                let max_glue = if count >= 256 { 96 } else { 48 };
1002                (count != 0 && count <= 2_000_000).then_some(0..=max_glue)
1003            };
1004            glues
1005                .into_iter()
1006                .flatten()
1007                .filter_map(move |glue| {
1008                    if !budget.tick() {
1009                        return None;
1010                    }
1011                    let first = at.checked_add(4)?.checked_add(glue)?;
1012                    count_fits(b, first, count, 32)
1013                        .then(|| bus_run(b, runs, first, count, budget))
1014                        .flatten()
1015                })
1016                .map(|(heads, end)| {
1017                    let last_unk = heads.last().map_or(0, |(_, unk, _)| *unk);
1018                    let shunts = heads
1019                        .iter()
1020                        .filter_map(|(bus, _, shunt)| {
1021                            shunt.clone().map(|mut shunt| {
1022                                shunt.bus = bus.id;
1023                                shunt
1024                            })
1025                        })
1026                        .collect();
1027                    (
1028                        heads.into_iter().map(|(bus, _, _)| bus).collect(),
1029                        shunts,
1030                        end,
1031                        last_unk,
1032                    )
1033                })
1034        })
1035}
1036
1037/// The bus record run from `first`, extended to `count` records if the bytes
1038/// allow. The run remembers the first record's family: one file's bus table
1039/// never mixes families, so the scan for each next record skips heads of the
1040/// other family (see [`bus_family`]). The items keep their flag words: the
1041/// scan window for the next record depends on the preceding record's bit 4,
1042/// as in the branch run.
1043fn bus_run(
1044    b: &[u8],
1045    runs: &RefCell<BusRuns>,
1046    first: usize,
1047    count: usize,
1048    budget: &SearchBudget,
1049) -> Option<(Vec<BusRunItem>, usize)> {
1050    let mut map = runs.borrow_mut();
1051    let (run, family) = match map.entry(first) {
1052        Entry::Occupied(e) => e.into_mut(),
1053        // A failed head parse is not cached: the table search probes far
1054        // more offsets than it accepts, and the probe itself is cheaper
1055        // than a map entry.
1056        Entry::Vacant(e) => {
1057            let (head, end) = read_bus_head(b, first).ok()?;
1058            let family = bus_family(head.unk);
1059            e.insert((Run::start((head.bus, head.unk, head.shunt), end), family))
1060        }
1061    };
1062    let family = *family;
1063    run.prefix(count, |after, prev| {
1064        // The record tail (undecoded; longer when flag bit 4 inserts a
1065        // count prefixed list) separates this record from the next; find
1066        // the next head by bounded scan (see resync_end).
1067        (after..resync_end(b, after, prev.1 & 0x10 != 0))
1068            .take_while(|_| budget.tick())
1069            .find_map(|p| {
1070                read_bus_head(b, p)
1071                    .ok()
1072                    .filter(|(h, _)| bus_family(h.unk) == family)
1073                    .map(|(h, end)| ((h.bus, h.unk, h.shunt), end))
1074            })
1075    })
1076}
1077
1078/// Parse one bus record head at `at`; everything through the voltage angle.
1079/// Header 338 and some small header 425 saves omit the balancing authority
1080/// field between zone and label. Returns the parsed bus and leaves undecoded
1081/// tail bytes, including the bit 4 list, to the resync.
1082fn read_bus_head(b: &[u8], at: usize) -> Probe<(BusHead, usize)> {
1083    let mut c = Cur { b, pos: at };
1084    let num = c.u32()? as usize;
1085    if num == 0 || num > 99_999_999 {
1086        return Err("implausible bus number");
1087    }
1088    let name_len = c.u32()? as usize;
1089    if name_len == 0 {
1090        return Err("empty bus name");
1091    }
1092    if name_len > 64 {
1093        return Err("string length exceeds the field maximum");
1094    }
1095    let name = c.take(name_len)?;
1096    // The flag mask (a handful of admitted words out of 2^32) is far more
1097    // selective than the name text scan, so it gates first; the accept set
1098    // is unchanged, only the rejection order.
1099    let unk = c.u32()?;
1100    if !known_bus_flags(unk) {
1101        return Err("bus record flags not in the validated set");
1102    }
1103    if !printable(name) {
1104        return Err("string has non printable bytes");
1105    }
1106    if unk & 1 == 0 {
1107        let _extra = c.u16()?;
1108    }
1109    let kv = c.f32()?;
1110    if !kv.is_finite() || !(0.0..=10_000.0).contains(&kv) {
1111        return Err("implausible nominal kV");
1112    }
1113    let area = c.u32()? as usize;
1114    let zone = c.u32()? as usize;
1115    if area > 100_000_000 || zone > 100_000_000 {
1116        return Err("implausible area/zone/BA number");
1117    }
1118    let after_zone = c.pos;
1119    let mut with_ba = Cur { b, pos: after_zone };
1120    let with_ba_result = (|| -> Probe<(f64, f64)> {
1121        let ba = with_ba.u32()?;
1122        if ba > 100_000_000 {
1123            return Err("implausible area/zone/BA number");
1124        }
1125        read_bus_label_and_solution(&mut with_ba)
1126    })();
1127    let (vm, va_rad) = if let Ok(solution) = with_ba_result {
1128        c.pos = with_ba.pos;
1129        solution
1130    } else {
1131        let mut old = Cur { b, pos: after_zone };
1132        let solution = read_bus_label_and_solution(&mut old)?;
1133        c.pos = old.pos;
1134        solution
1135    };
1136    if !vm.is_finite() || !(0.0..=10.0).contains(&vm) || !va_rad.is_finite() || va_rad.abs() > 100.0
1137    {
1138        return Err("implausible voltage solution");
1139    }
1140    let bus = Bus {
1141        id: BusId(num),
1142        kind: BusType::Pq,
1143        vm,
1144        va: va_rad.to_degrees(),
1145        base_kv: kv,
1146        vmax: 1.1,
1147        vmin: 0.9,
1148        evhi: None,
1149        evlo: None,
1150        area,
1151        zone,
1152        name: Some(String::from_utf8_lossy(name).into_owned()),
1153        uid: None,
1154        extras: Extras::new(),
1155    };
1156    let shunt = bus_tail_shunt(b, c.pos, BusId(num));
1157    Ok((BusHead { bus, shunt, unk }, c.pos))
1158}
1159
1160/// Decode the optional fixed shunt stored in some bus record tails.
1161fn bus_tail_shunt(b: &[u8], after_head: usize, bus: BusId) -> Option<Shunt> {
1162    let g_pu = b
1163        .get(after_head.checked_add(1)?..after_head.checked_add(5)?)
1164        .and_then(|s| <[u8; 4]>::try_from(s).ok())
1165        .map(f32::from_le_bytes)
1166        .map(f64::from)
1167        .filter(|g| g.is_finite() && g.abs() <= 1.0e6)
1168        .unwrap_or(0.0);
1169    let b_pu = b
1170        .get(after_head.checked_add(5)?..after_head.checked_add(9)?)
1171        .and_then(|s| <[u8; 4]>::try_from(s).ok())
1172        .map(f32::from_le_bytes)
1173        .map(f64::from)?;
1174    if !b_pu.is_finite() || b_pu.abs() > 1.0e6 || (g_pu.abs() <= 1e-9 && b_pu.abs() <= 1e-9) {
1175        return None;
1176    }
1177    let mut extras = Extras::new();
1178    extras.insert(
1179        "ShuntID".into(),
1180        serde_json::Value::String("BusShunt".into()),
1181    );
1182    Some(Shunt {
1183        bus,
1184        g: g_pu * MVA_BASE,
1185        b: b_pu * MVA_BASE,
1186        in_service: true,
1187        control: None,
1188        uid: None,
1189        extras,
1190    })
1191}
1192
1193/// Read the bus label plus solved voltage magnitude and angle.
1194fn read_bus_label_and_solution(c: &mut Cur<'_>) -> Probe<(f64, f64)> {
1195    let _label = c.string(64)?;
1196    let vm = c.f64()?;
1197    let va_rad = c.f64()?;
1198    Ok((vm, va_rad))
1199}
1200
1201// ---- Device tables (loads, generators, shunts) -------------------------------
1202
1203/// One whole device record: parse at `at`, return the element and the offset
1204/// just past the decoded head (undecoded tail bytes are the resync scan's to
1205/// skip). One function per validated record layout. The bound is generic
1206/// rather than a `fn` pointer so each table's probe monomorphizes and the
1207/// early rejection checks inline into the resync scans, the hottest loops
1208/// in the search.
1209trait ReadRecord<T>: Fn(&[u8], usize, &BusIdSet) -> Probe<(T, usize)> + Copy {}
1210impl<T, F: Fn(&[u8], usize, &BusIdSet) -> Probe<(T, usize)> + Copy> ReadRecord<T> for F {}
1211
1212/// The bus + ShortString ID prefix the 425/508 era device records share.
1213/// `read` parses the rest of the record head at the cursor and returns the
1214/// element.
1215fn read_device_head<T>(
1216    b: &[u8],
1217    at: usize,
1218    bus_ids: &BusIdSet,
1219    read: fn(&mut Cur, BusId, &[u8]) -> Probe<T>,
1220) -> Probe<(T, usize)> {
1221    let mut c = Cur { b, pos: at };
1222    let bus = c.u32()? as usize;
1223    if !bus_ids.contains(bus) {
1224        return Err("record references an unknown bus");
1225    }
1226    let id = c.short_string(8)?;
1227    if id.is_empty() {
1228        return Err("empty device ID");
1229    }
1230    let v = read(&mut c, BusId(bus), id)?;
1231    Ok((v, c.pos))
1232}
1233
1234/// Probe one load record using the shared device head.
1235fn read_load_record(b: &[u8], at: usize, bus_ids: &BusIdSet) -> Probe<(Load, usize)> {
1236    read_device_head(b, at, bus_ids, read_load)
1237}
1238
1239/// Probe one plain generator record using the shared device head.
1240fn read_gen_record(b: &[u8], at: usize, bus_ids: &BusIdSet) -> Probe<(Generator, usize)> {
1241    read_device_head(b, at, bus_ids, read_gen)
1242}
1243
1244/// Probe one switched shunt record using the shared device head.
1245fn read_shunt_record(b: &[u8], at: usize, bus_ids: &BusIdSet) -> Probe<(Shunt, usize)> {
1246    read_device_head(b, at, bus_ids, read_shunt)
1247}
1248
1249/// Candidates for a count prefixed device table after `from`: every
1250/// `(count, glue)` whose full record walk succeeds, in scan order. The caller
1251/// keeps a candidate only if the tables that must follow it parse too.
1252#[expect(clippy::too_many_arguments)]
1253fn device_table_candidates<'a, T: Clone + 'a>(
1254    b: &'a [u8],
1255    scan: std::ops::Range<usize>,
1256    bus_ids: &'a BusIdSet,
1257    read: impl ReadRecord<T> + 'a,
1258    runs: &'a RefCell<HashMap<usize, Run<T>>>,
1259    max_glue: usize,
1260    min_record_len: usize,
1261    budget: &'a SearchBudget,
1262) -> impl Iterator<Item = (Vec<T>, usize)> + 'a {
1263    let limit = scan.end.min(b.len().saturating_sub(4));
1264    (scan.start..limit)
1265        .take_while(move |_| !budget.exhausted())
1266        .flat_map(move |at| {
1267            let count = u32::from_le_bytes(b[at..at + 4].try_into().unwrap()) as usize;
1268            let glues = (count != 0 && count <= 10_000_000).then_some(0..=max_glue);
1269            glues.into_iter().flatten().filter_map(move |glue| {
1270                if !budget.tick() {
1271                    return None;
1272                }
1273                let first = at.checked_add(4)?.checked_add(glue)?;
1274                count_fits(b, first, count, min_record_len)
1275                    .then(|| device_run(b, runs, first, count, bus_ids, read, budget))
1276                    .flatten()
1277            })
1278        })
1279}
1280
1281/// The device record run from `first`, extended to `count` records if the
1282/// bytes allow (see [`Run`]).
1283fn device_run<T: Clone>(
1284    b: &[u8],
1285    runs: &RefCell<HashMap<usize, Run<T>>>,
1286    first: usize,
1287    count: usize,
1288    bus_ids: &BusIdSet,
1289    read: impl ReadRecord<T>,
1290    budget: &SearchBudget,
1291) -> Option<(Vec<T>, usize)> {
1292    let mut map = runs.borrow_mut();
1293    let run = match map.entry(first) {
1294        Entry::Occupied(e) => e.into_mut(),
1295        // A failed head parse is not cached, as in the sibling run lookups.
1296        Entry::Vacant(e) => {
1297            let (item, end) = read(b, first, bus_ids).ok()?;
1298            e.insert(Run::start(item, end))
1299        }
1300    };
1301    run.prefix(count, |after, _| {
1302        // The undecoded record tail separates this record from the next.
1303        (after..after.saturating_add(RESYNC_WINDOW).min(b.len()))
1304            .take_while(|_| budget.tick())
1305            .find_map(|p| read(b, p, bus_ids).ok())
1306    })
1307}
1308
1309/// Load record: one undecoded byte, then constant power P and Q in per unit
1310/// (f32). The byte is 0x00 in every 425 era record and 0x01 in every 483
1311/// era one while both auxes say every load is Closed, so it is not a status
1312/// byte; loads read as in service (see the module docs).
1313fn read_load(c: &mut Cur, bus: BusId, id: &[u8]) -> Probe<Load> {
1314    let record_start = c.pos - (4 + 1) - id.len(); // u32 bus + the ID length byte
1315    let flag = c.u8()?;
1316    if flag > 1 {
1317        return Err("load status byte not in the validated set");
1318    }
1319    let mut p = c.f32()? * MVA_BASE;
1320    let mut q = c.f32()? * MVA_BASE;
1321    let mut in_service = true;
1322    if flag == 0 && p.abs() < 1e-30 && q.abs() < 1e-30 {
1323        let early_p = f32_at(c.b, checked_offset(record_start, 25)?)? * MVA_BASE;
1324        let early_q = f32_at(c.b, checked_offset(record_start, 29)?)? * MVA_BASE;
1325        let late_p = f32_at(c.b, checked_offset(record_start, 33)?)? * MVA_BASE;
1326        let late_q = f32_at(c.b, checked_offset(record_start, 37)?)? * MVA_BASE;
1327        let early_is_marker = (early_p - MVA_BASE).abs() <= 1e-6 && early_q.abs() <= 1e-30;
1328        let (alt_p, alt_q, end) = if early_is_marker {
1329            (late_p, late_q, checked_offset(record_start, 41)?)
1330        } else {
1331            (early_p, early_q, checked_offset(record_start, 33)?)
1332        };
1333        if alt_p.abs() > 1e-30 || alt_q.abs() > 1e-30 {
1334            p = alt_p;
1335            q = alt_q;
1336            in_service = !early_is_marker;
1337        }
1338        c.pos = c.pos.max(end);
1339    }
1340    if !p.is_finite() || !q.is_finite() || p.abs() > 1.0e6 || q.abs() > 1.0e6 {
1341        return Err("implausible load power");
1342    }
1343    let mut extras = Extras::new();
1344    extras.insert(
1345        "LoadID".into(),
1346        serde_json::Value::String(String::from_utf8_lossy(id).into_owned()),
1347    );
1348    Ok(Load {
1349        bus,
1350        p,
1351        q,
1352        voltage_model: None,
1353        in_service,
1354        uid: None,
1355        extras,
1356    })
1357}
1358
1359/// Generator record: the ID is a fixed capacity ShortString[2] (so the
1360/// payload sits at constant offsets from the record start), undecoded flag
1361/// bytes, then eight consecutive f32s: MW setpoint, MVAr setpoint, MVAr
1362/// max, MVAr min (per unit), voltage setpoint (p.u.), MVA base, MW max, MW
1363/// min (per unit). The f32 block starts at +9 or +10 in the 2016/2017
1364/// exports (the flag bytes before it vary per record) and +11 in the 2018
1365/// one; the voltage setpoint and MVA base ranges anchor the choice, and a
1366/// record that puts implausible values at every offset is a loud error, not
1367/// a generator.
1368fn read_gen(c: &mut Cur, bus: BusId, id: &[u8]) -> Probe<Generator> {
1369    let record_start = c.pos - (4 + 1) - id.len(); // u32 bus + the ID length byte
1370    let mut chosen = None;
1371    // +12 extends the observed set to two character IDs in a 2018 era
1372    // export (unobserved, but the pre-rework probe covered it).
1373    for anchor in [9usize, 10, 11, 12] {
1374        let mut probe = Cur {
1375            b: c.b,
1376            pos: checked_offset(record_start, anchor)?,
1377        };
1378        if let Ok(vals) = read_gen_f32_block(&mut probe) {
1379            chosen = Some((vals, probe.pos));
1380            break;
1381        }
1382    }
1383    let Some((v, end)) = chosen else {
1384        return Err("generator record does not match the validated layouts");
1385    };
1386    c.pos = end;
1387    // The status byte is unlocated within the flag bytes of this era's
1388    // record; every available machine is Closed (see the module docs).
1389    Ok(gen_from_block(bus, &v, true))
1390}
1391
1392/// The eight consecutive f32 per unit values both generator record eras
1393/// share: MW setpoint, MVAr setpoint, MVRMax, MVRMin, GenVoltSet, GenMVABase,
1394/// MWMax, MWMin. The voltage setpoint and MVA base ranges anchor the layout;
1395/// a block that fails them is not a generator record.
1396fn read_gen_f32_block(c: &mut Cur) -> Probe<[f64; 8]> {
1397    let mut v = [0.0f64; 8];
1398    // Each slot checks as it reads, and the two anchor ranges right after
1399    // their slots, so a forged offset stops within a few reads instead of
1400    // always paying all eight; same predicates, same accept set.
1401    for (i, slot) in v.iter_mut().enumerate() {
1402        let x = c.f32()?;
1403        if !x.is_finite() || x.abs() >= 1.0e6 {
1404            return Err("generator record does not match the validated layouts");
1405        }
1406        if (i == 4 && !(0.5..=1.6).contains(&x)) || (i == 5 && !(0.1..=1.0e5).contains(&x)) {
1407            return Err("generator record does not match the validated layouts");
1408        }
1409        *slot = x;
1410    }
1411    Ok(v)
1412}
1413
1414/// A [`Generator`] from the shared f32 block (see [`read_gen_f32_block`]).
1415fn gen_from_block(bus: BusId, v: &[f64; 8], in_service: bool) -> Generator {
1416    Generator {
1417        bus,
1418        pg: v[0] * MVA_BASE,
1419        qg: v[1] * MVA_BASE,
1420        qmax: v[2] * MVA_BASE,
1421        qmin: v[3] * MVA_BASE,
1422        vg: v[4],
1423        mbase: v[5],
1424        pmax: v[6] * MVA_BASE,
1425        pmin: v[7] * MVA_BASE,
1426        in_service,
1427        cost: None,
1428        caps: Default::default(),
1429        regulated_bus: None,
1430        uid: None,
1431    }
1432}
1433
1434/// 2021 era generator record (header constant 483, the Texas7k export),
1435/// validated against all 731 machines of the same day aux: u32 terminal
1436/// bus, u32 regulated bus (the inserted field that distinguishes this
1437/// layout; on plants regulating a remote bus the two differ, which is what
1438/// made the older record model misread until the boundary was re-fit), a
1439/// fixed capacity ShortString[2] ID, a constant 0x01 byte, one undecoded
1440/// byte, then a presence byte whose bit 0 inserts an f32 and bit 1 one
1441/// byte, then the same eight f32 block as the older eras. One past the
1442/// block sit a zero byte and the status byte: bit 0 is the in service bit,
1443/// validated against the aux's 637 Closed and 94 Open machines (the
1444/// corpus's first out of service devices). The f32 after it reads as
1445/// GenRMPCT in the aux (100.0 on every record) and anchors the layout.
1446fn read_gen_reg_record(b: &[u8], at: usize, bus_ids: &BusIdSet) -> Probe<(Generator, usize)> {
1447    let mut c = Cur { b, pos: at };
1448    let bus = c.u32()? as usize;
1449    if !bus_ids.contains(bus) {
1450        return Err("record references an unknown bus");
1451    }
1452    let reg = c.u32()? as usize;
1453    if !bus_ids.contains(reg) {
1454        return Err("regulated bus is not a known bus");
1455    }
1456    let _id = c.short_string_2()?;
1457    if c.u8()? != 1 {
1458        return Err("generator record lead byte not 1");
1459    }
1460    let _ = c.u8()?; // varies per record (7 through 37 observed); undecoded
1461    // Presence byte: bit 0 inserts an f32, bit 1 one byte (both in the
1462    // 2021 export), bit 5 another f32 (the 2030 build); the eight f32
1463    // block follows whatever the bits insert.
1464    let pres = c.u8()?;
1465    if pres & !0x23 != 0 {
1466        return Err("generator presence byte not in the validated set");
1467    }
1468    if pres & 0x22 == 0x22 {
1469        // Bits 1 and 5 never co-occur in the corpus, so the order of their
1470        // inserted fields is unestablished; guessing it risks reading a
1471        // misaligned f32, so the combination rejects until a file shows it.
1472        return Err("generator presence bits 1 and 5 together are unobserved");
1473    }
1474    for bit in [0x01, 0x20] {
1475        if pres & bit != 0 {
1476            let v = c.f32()?;
1477            if !v.is_finite() || v.abs() > 1.0e6 {
1478                return Err("implausible presence gated generator value");
1479            }
1480        }
1481    }
1482    if pres & 2 != 0 {
1483        let _ = c.u8()?;
1484    }
1485    let v = read_gen_f32_block(&mut c)?;
1486    read_gen_reg_tail(&mut c, bus, &v)
1487}
1488
1489/// Header 554 regulated generator record: terminal bus, regulated bus,
1490/// fixed capacity ID, two zero bytes, then the shared f32 block and the
1491/// same status/RMPCT tail as [`read_gen_reg_record`].
1492fn read_gen_reg_simple_record(
1493    b: &[u8],
1494    at: usize,
1495    bus_ids: &BusIdSet,
1496) -> Probe<(Generator, usize)> {
1497    let mut c = Cur { b, pos: at };
1498    let bus = c.u32()? as usize;
1499    if !bus_ids.contains(bus) {
1500        return Err("record references an unknown bus");
1501    }
1502    let reg = c.u32()? as usize;
1503    if !bus_ids.contains(reg) {
1504        return Err("regulated bus is not a known bus");
1505    }
1506    let _id = c.short_string_2()?;
1507    if c.u8()? != 0 || c.u8()? != 0 {
1508        return Err("generator record separator bytes not zero");
1509    }
1510    let v = read_gen_f32_block(&mut c)?;
1511    read_gen_reg_tail(&mut c, bus, &v)
1512}
1513
1514/// Read the status and RMPCT tail shared by regulated generator records.
1515fn read_gen_reg_tail(c: &mut Cur<'_>, bus: usize, v: &[f64; 8]) -> Probe<(Generator, usize)> {
1516    if c.u8()? != 0 {
1517        return Err("generator record separator byte not zero");
1518    }
1519    let status = c.u8()?;
1520    if status & !0x01 != 0x08 {
1521        return Err("generator status byte not in the validated set");
1522    }
1523    let rmpct = c.f32()?;
1524    if !rmpct.is_finite() || !(0.0..=1000.0).contains(&rmpct) {
1525        return Err("implausible remote regulation percentage");
1526    }
1527    Ok((gen_from_block(BusId(bus), v, status & 1 == 1), c.pos))
1528}
1529
1530/// Shunt record: nominal MVAr as f32 at +24 from the record start, validated
1531/// on all 199 shunts across the three sibling cases. The slot at +20 is 0.0
1532/// in the Simulator 20 era files but 0.99 in the 2016 export, so it is not
1533/// the nominal MW (see the module docs); shunts read with `g = 0`.
1534fn read_shunt(c: &mut Cur, bus: BusId, id: &[u8]) -> Probe<Shunt> {
1535    let record_start = c.pos - (4 + 1) - id.len(); // u32 bus + the ID length byte
1536    let mut probe = Cur {
1537        b: c.b,
1538        pos: checked_offset(record_start, 24)?,
1539    };
1540    let b_mvar = probe.f32()? * MVA_BASE;
1541    if !b_mvar.is_finite() || b_mvar.abs() > 1.0e6 {
1542        return Err("implausible shunt MVAr");
1543    }
1544    c.pos = probe.pos;
1545    let mut extras = Extras::new();
1546    extras.insert(
1547        "ShuntID".into(),
1548        serde_json::Value::String(String::from_utf8_lossy(id).into_owned()),
1549    );
1550    Ok(Shunt {
1551        bus,
1552        // The nominal MW slot is unlocated (every available case stores
1553        // zero); see the module docs.
1554        g: 0.0,
1555        b: b_mvar,
1556        in_service: true,
1557        control: None,
1558        uid: None,
1559        extras,
1560    })
1561}
1562
1563// ---- Branch table ------------------------------------------------------------
1564
1565/// Whether a branch record flag word is one this reader decodes: base bits
1566/// `0x4C` plus any combination of bits 0, 1, 4, 5, and 7, a Delphi field
1567/// presence bitmask like the bus record's. Bit 0 set omits
1568/// the circuit ID string and its status byte (the PowerWorld default " 1"
1569/// applies), bit 1 set means two inline rating slots instead of three
1570/// (the Simulator 19 era writer inlines three), bit 4 marks a count
1571/// prefixed list in the record tail. Bit 7 is set on every 425/508 era
1572/// record; the 2021 era Texas7k exports clear it on most lines while
1573/// setting it on every transformer and a few dozen lines, with the head
1574/// layout through the kind byte identical either way (its field lives in
1575/// the undecoded tail). Admitting the bit 7 clear words doubles the flag
1576/// vocabulary; the measured cost on the 425 era corpus is a few
1577/// microseconds (benchmarks/RESULTS.md), and a mask keyed to the generator
1578/// layout was tried and rejected anyway, since the strict mask turns real
1579/// bit 7 clear records invisible to the table end check and a forged
1580/// short table can win.
1581/// Observed words: 0xEC/0xFC (2016), 0xEE/0xEF (2018 and v19), 0xFE/0xFF
1582/// (v19), 0x6C and 0xEC/0xED (Texas7k), 0xCE on the Australian series
1583/// capacitor records, and the same families with bits 10 or 14 set in the
1584/// Kundur save. Other combinations of the same bits are admitted by the bit
1585/// logic and guarded by the structural anchors in [`read_branch_head`].
1586fn known_branch_flags(flags: u16) -> bool {
1587    flags & !0x44B3 == 0x004C
1588}
1589
1590/// Locate and walk the branch table after `from`: the first `(count, glue)`
1591/// candidate whose walk succeeds and after which no further branch record
1592/// follows (a forged count word inside the glue can parse a prefix of the
1593/// real table; the true count lands where no further record follows).
1594fn find_branch_table(
1595    b: &[u8],
1596    from: usize,
1597    bus_ids: &BusIdSet,
1598    bus_names: &HashMap<String, BusId>,
1599    runs: &RefCell<HashMap<usize, Run<(Branch, u16)>>>,
1600    count_can_include_trailer: bool,
1601    budget: &SearchBudget,
1602) -> Option<Vec<Branch>> {
1603    // The gap between the shunt table end and the branch count word can
1604    // exceed one resync window; two cover every observed file.
1605    let limit = from
1606        .saturating_add(RESYNC_WINDOW * 2)
1607        .min(b.len().saturating_sub(4));
1608    for at in from..limit {
1609        let count = u32::from_le_bytes(b[at..at + 4].try_into().unwrap()) as usize;
1610        if count == 0 || count > 10_000_000 {
1611            continue;
1612        }
1613        // The branch table glue is longer than the device tables'; scan a
1614        // window after the count for the first record.
1615        let Some(first) = (at.saturating_add(4)..at.saturating_add(64).min(b.len()))
1616            .take_while(|_| budget.tick())
1617            .find(|&p| read_branch_head(b, p, bus_ids, bus_names).is_ok())
1618        else {
1619            continue;
1620        };
1621        let counts = [
1622            Some(count),
1623            (count_can_include_trailer && count > 1).then_some(count - 1),
1624        ];
1625        for effective_count in counts.into_iter().flatten() {
1626            if !count_fits(b, at.saturating_add(4), effective_count, 24) {
1627                continue;
1628            }
1629            if let Some((branches, after)) =
1630                branch_run(b, runs, first, effective_count, bus_ids, bus_names, budget)
1631            {
1632                // The end check must step exactly like the run: a bit 4 tail on
1633                // the last record can hold more than one window of blob, and a
1634                // forged short count ending on such a record would otherwise
1635                // read as "no further record" and win.
1636                let last_bit4 = branches.last().is_some_and(|(_, flags)| flags & 0x10 != 0);
1637                let continues = (after..resync_end(b, after, last_bit4))
1638                    .take_while(|_| budget.tick())
1639                    .any(|p| read_branch_head(b, p, bus_ids, bus_names).is_ok());
1640                // A scan cut short by the budget cannot vouch that the table
1641                // ends here; reject rather than accept a possibly forged count.
1642                if budget.exhausted() {
1643                    return None;
1644                }
1645                if !continues {
1646                    return Some(branches.into_iter().map(|(br, _)| br).collect());
1647                }
1648            }
1649        }
1650    }
1651    None
1652}
1653
1654/// The branch record run from `first`, extended to `count` records if the
1655/// bytes allow (see [`Run`]). The items keep their flag words: the scan
1656/// window for the next record depends on the preceding record's bit 4.
1657fn branch_run(
1658    b: &[u8],
1659    runs: &RefCell<HashMap<usize, Run<(Branch, u16)>>>,
1660    first: usize,
1661    count: usize,
1662    bus_ids: &BusIdSet,
1663    bus_names: &HashMap<String, BusId>,
1664    budget: &SearchBudget,
1665) -> Option<(Vec<(Branch, u16)>, usize)> {
1666    let mut map = runs.borrow_mut();
1667    let run = match map.entry(first) {
1668        Entry::Occupied(e) => e.into_mut(),
1669        // A failed head parse is not cached, as in the sibling run lookups.
1670        Entry::Vacant(e) => {
1671            let (br, end, flags) = read_branch_head(b, first, bus_ids, bus_names).ok()?;
1672            e.insert(Run::start((br, flags), end))
1673        }
1674    };
1675    run.prefix(count, |after, prev| {
1676        // The undecoded record tail separates this record from the next;
1677        // find the next head by bounded scan (see resync_end).
1678        (after..resync_end(b, after, prev.1 & 0x10 != 0))
1679            .take_while(|_| budget.tick())
1680            .find_map(|p| {
1681                read_branch_head(b, p, bus_ids, bus_names)
1682                    .ok()
1683                    .map(|(br, end, flags)| ((br, flags), end))
1684            })
1685    })
1686}
1687
1688/// Branch record, validated field by field against the aux siblings of all
1689/// three cases (6,491 records). After the impedances: two or three inline
1690/// per unit rating slots (by flag bit 1), a constant u32 tag, eleven f32
1691/// slots (zero in every available case), one zero byte, then the kind byte
1692/// that separates lines from transformers (which carry their tap next). The
1693/// tag and the zero byte are structural anchors: an unobserved variant
1694/// shifts them and dies loudly instead of misreading.
1695#[allow(clippy::many_single_char_names)] // r, x, b are the domain names
1696fn read_branch_head(
1697    b: &[u8],
1698    at: usize,
1699    bus_ids: &BusIdSet,
1700    bus_names: &HashMap<String, BusId>,
1701) -> Probe<(Branch, usize, u16)> {
1702    read_step_up_transformer_head(b, at, bus_ids, bus_names)
1703        .or_else(|_| read_standard_branch_head(b, at, bus_ids))
1704}
1705
1706#[allow(clippy::many_single_char_names)] // r, x, b are the domain names
1707fn read_standard_branch_head(
1708    b: &[u8],
1709    at: usize,
1710    bus_ids: &BusIdSet,
1711) -> Probe<(Branch, usize, u16)> {
1712    let mut c = Cur { b, pos: at };
1713    let from = branch_endpoint(&mut c)?;
1714    let to = branch_endpoint(&mut c)?;
1715    if !bus_ids.contains(from) || !bus_ids.contains(to) || from == to {
1716        return Err("branch references unknown buses");
1717    }
1718    let flags = c.u16()?;
1719    if !known_branch_flags(flags) {
1720        return Err("branch record flags not in the validated set");
1721    }
1722    let circuit = if flags & 1 == 0 {
1723        Some(c.short_string_2()?)
1724    } else {
1725        // Omitted circuit: PowerWorld's default, observed as " 1" in the
1726        // sibling aux.
1727        None
1728    };
1729    let r = c.f32()?;
1730    let x = c.f32()?;
1731    let b_chg = c.f32()?;
1732    for v in [r, x, b_chg] {
1733        if !v.is_finite() || v.abs() > 1.0e4 {
1734            return Err("implausible branch impedance");
1735        }
1736    }
1737    let _g = c.f32()?;
1738    let inline = if flags & 2 == 0 { 3 } else { 2 };
1739    let mut rates = [0.0f64; 3];
1740    for slot in rates.iter_mut().take(inline) {
1741        let v = c.f32()?;
1742        if !v.is_finite() || !(0.0..=1.0e6).contains(&v) {
1743            return Err("implausible branch rating");
1744        }
1745        *slot = v * MVA_BASE;
1746    }
1747    let tail_start = c.pos;
1748    let tag = c.u32()?;
1749    let (device, tap) = match tag {
1750        12 => read_modern_branch_tail(&mut c)?,
1751        5 => read_legacy_branch_tail(&mut c, tail_start)?,
1752        _ => return Err("branch tail tag not in the validated set"),
1753    };
1754    let mut extras = Extras::new();
1755    extras.insert(
1756        LINE_CIRCUIT.into(),
1757        serde_json::Value::String(circuit.map_or_else(
1758            || " 1".to_string(),
1759            |s| String::from_utf8_lossy(s).into_owned(),
1760        )),
1761    );
1762    extras.insert(
1763        BRANCH_DEVICE_TYPE.into(),
1764        serde_json::Value::String(device.into()),
1765    );
1766    let br = Branch {
1767        from: BusId(from),
1768        to: BusId(to),
1769        r,
1770        x,
1771        b: b_chg,
1772        charging: None,
1773        rate_a: rates[0],
1774        rate_b: rates[1],
1775        rate_c: rates[2],
1776        rating_sets: Vec::new(),
1777        current_ratings: None,
1778        tap,
1779        // Phase shift is undecoded: every available case has zero phase, so
1780        // the field's location is unknown (see the module docs).
1781        shift: 0.0,
1782        // The branch status byte is unlocated (the byte once assumed to be
1783        // it was the circuit ID's unused capacity byte); every available
1784        // record is Closed. See the module docs.
1785        in_service: true,
1786        angmin: -360.0,
1787        angmax: 360.0,
1788        control: None,
1789        solution: None,
1790        uid: None,
1791        extras,
1792    };
1793    Ok((br, c.pos, flags))
1794}
1795
1796/// Read a signed branch endpoint. Some saves store a negative endpoint for
1797/// orientation metadata; the network bus id is the positive magnitude.
1798fn branch_endpoint(c: &mut Cur<'_>) -> Probe<usize> {
1799    let raw = i32::from_le_bytes(c.take(4)?.try_into().unwrap());
1800    raw.checked_abs()
1801        .and_then(|id| (id > 0).then_some(id as usize))
1802        .ok_or("invalid branch endpoint")
1803}
1804
1805/// Probe the fixed-layout generator step-up transformer records found in the
1806/// Australian cases. They do not use the normal branch head, so this reader
1807/// keeps them behind a separate probe with several anchors: known high side
1808/// bus, 100 MVA nominal marker, plausible device X/MBASE fields, "STEP UP" in
1809/// the name, and a low side bus named `GEN <unit>`.
1810fn read_step_up_transformer_head(
1811    b: &[u8],
1812    at: usize,
1813    bus_ids: &BusIdSet,
1814    bus_names: &HashMap<String, BusId>,
1815) -> Probe<(Branch, usize, u16)> {
1816    if at < 8 {
1817        return Err("step up transformer anchor before record");
1818    }
1819    let from = u32_at(b, at)? as usize;
1820    if !bus_ids.contains(from) {
1821        return Err("step up transformer high side bus is unknown");
1822    }
1823    let nominal = f32_at(b, at - 8)?;
1824    if !nominal.is_finite() || (nominal - 100.0).abs() > 1e-3 {
1825        return Err("step up transformer nominal anchor missing");
1826    }
1827    let x_device = f32_at(b, checked_offset(at, 17)?)?;
1828    let mbase = f32_at(b, checked_offset(at, 197)?)?;
1829    if !x_device.is_finite()
1830        || !(0.0..=100.0).contains(&x_device)
1831        || !mbase.is_finite()
1832        || !(0.1..=1.0e5).contains(&mbase)
1833    {
1834        return Err("step up transformer impedance anchor missing");
1835    }
1836    let name_at = checked_offset(at, 356)?;
1837    let name = string_at(b, name_at, 64)?;
1838    let Some(gen_name) = name.split_whitespace().next() else {
1839        return Err("step up transformer name is empty");
1840    };
1841    if !name.to_ascii_uppercase().contains("STEP UP") {
1842        return Err("step up transformer name anchor missing");
1843    }
1844    let to = bus_names
1845        .get(&format!("GEN {}", gen_name.to_ascii_uppercase()))
1846        .copied()
1847        .ok_or("step up transformer low side bus is unknown")?;
1848    if to.0 == from {
1849        return Err("step up transformer has identical endpoints");
1850    }
1851    let mut extras = Extras::new();
1852    extras.insert(LINE_CIRCUIT.into(), serde_json::Value::String(" 1".into()));
1853    extras.insert(
1854        BRANCH_DEVICE_TYPE.into(),
1855        serde_json::Value::String("Transformer".into()),
1856    );
1857    let br = Branch {
1858        from: BusId(from),
1859        to,
1860        r: 0.0,
1861        x: x_device * mbase / MVA_BASE,
1862        b: 0.0,
1863        charging: None,
1864        rate_a: 0.0,
1865        rate_b: 0.0,
1866        rate_c: 0.0,
1867        rating_sets: Vec::new(),
1868        current_ratings: None,
1869        tap: 1.0,
1870        shift: 0.0,
1871        in_service: true,
1872        angmin: -360.0,
1873        angmax: 360.0,
1874        control: None,
1875        solution: None,
1876        uid: None,
1877        extras,
1878    };
1879    Ok((
1880        br,
1881        checked_offset(checked_offset(name_at, 4)?, name.len())?,
1882        0,
1883    ))
1884}
1885
1886/// Read the modern branch tail after the common electrical head. The tail is
1887/// a rating block, separator byte, and kind marker; kind 1 is a line, kind 0
1888/// is a transformer followed by the tap.
1889fn read_modern_branch_tail(c: &mut Cur<'_>) -> Probe<(&'static str, f64)> {
1890    for _ in 0..11 {
1891        let v = c.f32()?;
1892        if !v.is_finite() || v.abs() > 1.0e6 {
1893            return Err("implausible branch rating block value");
1894        }
1895    }
1896    if c.u8()? != 0 {
1897        return Err("branch record separator byte not zero");
1898    }
1899    match c.u8()? {
1900        0x01 => Ok(("Line", 0.0)),
1901        0x00 => {
1902            let tap = c.f32()?;
1903            if !tap.is_finite() || !(0.2..=5.0).contains(&tap) {
1904                return Err("implausible transformer tap");
1905            }
1906            Ok(("Transformer", tap))
1907        }
1908        _ => Err("branch kind marker not in the validated set"),
1909    }
1910}
1911
1912/// Read the older short branch tail. These saves do not carry the modern kind
1913/// marker, so transformer detection uses the validated zero marker block plus
1914/// a plausible non-unit tap at the observed tail offset.
1915fn read_legacy_branch_tail(c: &mut Cur<'_>, tail_start: usize) -> Probe<(&'static str, f64)> {
1916    for _ in 0..4 {
1917        if c.u32()? != 0 {
1918            return Err("legacy branch tail marker is not zero filled");
1919        }
1920    }
1921    let tap = tail_start
1922        .checked_add(22)
1923        .and_then(|at| slice_at(c.b, at, 4))
1924        .and_then(|s| <[u8; 4]>::try_from(s).ok())
1925        .map_or(0.0, |raw| f64::from(f32::from_le_bytes(raw)));
1926    if tap.is_finite() && (0.2..=5.0).contains(&tap) && (tap - 1.0).abs() > 1e-6 {
1927        Ok(("Transformer", tap))
1928    } else {
1929        Ok(("Line", 0.0))
1930    }
1931}
1932
1933#[cfg(test)]
1934mod tests {
1935    use super::*;
1936
1937    fn empty_network(name: &str) -> Network {
1938        Network {
1939            name: name.to_string(),
1940            base_mva: MVA_BASE,
1941            base_frequency: crate::network::DEFAULT_BASE_FREQUENCY,
1942            buses: Vec::new(),
1943            loads: Vec::new(),
1944            shunts: Vec::new(),
1945            branches: Vec::new(),
1946            switches: Vec::new(),
1947            generators: Vec::new(),
1948            storage: Vec::new(),
1949            hvdc: Vec::new(),
1950            transformers_3w: Vec::new(),
1951            areas: Vec::new(),
1952            solver: None,
1953            source_format: SourceFormat::PowerWorldBinary,
1954            source: None,
1955        }
1956    }
1957
1958    #[test]
1959    fn best_chain_prefers_valid_chain_over_higher_scoring_error() {
1960        let mut best = None;
1961        keep_best_chain(&mut best, 100, Err(unsupported_vintage("bad candidate")));
1962        keep_best_chain(&mut best, 1, Ok(empty_network("valid")));
1963
1964        let (_, net) = best.unwrap();
1965        assert!(net.is_ok());
1966    }
1967
1968    #[test]
1969    fn alternate_load_record_reads_late_p_and_q() {
1970        let mut bytes = vec![0u8; 41];
1971        bytes[6] = 0;
1972        bytes[25..29].copy_from_slice(&1.0f32.to_le_bytes());
1973        bytes[29..33].copy_from_slice(&0.0f32.to_le_bytes());
1974        bytes[33..37].copy_from_slice(&0.5f32.to_le_bytes());
1975        bytes[37..41].copy_from_slice(&0.25f32.to_le_bytes());
1976
1977        let mut c = Cur { b: &bytes, pos: 6 };
1978        let load = read_load(&mut c, BusId(1), b"1").unwrap();
1979
1980        assert!((load.p - 50.0).abs() < 1e-9);
1981        assert!((load.q - 25.0).abs() < 1e-9);
1982        assert_eq!(c.pos, 41);
1983    }
1984}