Skip to main content

powerio/format/powerworld/
pwd.rs

1//! Read substation coordinates from PowerWorld `.pwd` display files
2//! (read only).
3//!
4//! A `.pwd` is the diagram sibling of a case: drawing records for buses,
5//! branches, substations, and field labels. This reader decodes the one
6//! subset with a differential oracle, the substation symbols, and leaves
7//! every other drawing object undecoded. Files without the substation table
8//! still return display metadata with an empty substation list. The evidence
9//! (seven files across
10//! the 2016 through 2022 writer eras, each matched 1-1 against the
11//! latitude/longitude its same vintage aux carries per substation, except
12//! the v19 resave, which matches 1248/1250 against the published case
13//! across a vintage skew) is in
14//! `powerio/src/format/powerworld/FORMAT.md`.
15//!
16//! Two structures carry the data, both present in every probed save:
17//!
18//! - The substation identity table, behind the only `ff ff ff ff 3d 0f`
19//!   byte sequence in the file (sentinel plus table tag 0x0f3d): records of
20//!   `u32 number, u32 number (exact duplicate), u32 length, name, 0x02`,
21//!   terminated exactly by the next `ff ff ff ff`. The order is display
22//!   order, not case order.
23//! - The DisplaySubstation drawing records: each repeats the file's header
24//!   stamp (the u32 at offset 22) at +18, stores the position as f64 x/y at
25//!   +22/+30 with an f32 echo of both at +2/+6, and links its substation
26//!   number behind a marker byte (0x03 or 0x07 by writer era) in the style
27//!   tail. The record's type tag (the u16 at +0) varies per save, so the
28//!   reader keys on this structure instead: stamp echo, dual encoded
29//!   coordinates, and a link to every identity row in table order. Decoy
30//!   groups exist (field label records with the same count and plausible
31//!   coordinates) and fail the link gauntlet; if more than one group ever
32//!   passes, the reader rejects rather than guesses.
33//!
34//! The coordinates are diagram positions (y north positive), not
35//! geography: no probed file stores latitude or longitude directly. The
36//! auto generated TAMU and Hawaii layouts equal a Mercator projection
37//! (`x = k * longitude`, `y = k * merc(latitude)`, k = 535.81608... on the
38//! never edited Hawaii40 file, bit exact), but hand moved symbols and the
39//! June 2016 era deviate, so the values are exposed as stored and any
40//! projection is the consumer's choice. Consumers wanting geography should
41//! read the aux.
42
43use std::collections::HashSet;
44use std::path::Path;
45
46use crate::{Error, Result};
47
48const FMT: &str = "PowerWorld .pwd";
49
50/// The identity table tag behind the `ff ff ff ff` sentinel.
51const IDENTITY_TAG: [u8; 6] = [0xff, 0xff, 0xff, 0xff, 0x3d, 0x0f];
52
53/// One substation symbol from a display file: the identity row joined with
54/// its drawing record, in identity table (display) order. `x` and `y` are
55/// diagram coordinates as stored, y north positive (see the module docs).
56#[derive(Debug, Clone, PartialEq)]
57pub struct PwdSubstation {
58    pub number: u32,
59    pub name: String,
60    pub x: f64,
61    pub y: f64,
62}
63
64/// Decoded PowerWorld display file content.
65///
66/// A `.pwd` is not a case file and does not carry a [`Network`](crate::Network).
67/// This structure exposes the display metadata the reader validates plus the
68/// supported drawing object subset.
69#[derive(Debug, Clone, PartialEq)]
70pub struct PwdDisplay {
71    pub canvas_width: u16,
72    pub canvas_height: u16,
73    pub stamp: u32,
74    pub substations: Vec<PwdSubstation>,
75}
76
77/// Read and parse a `.pwd` display file.
78///
79/// # Errors
80/// [`Error::Io`] when the file cannot be read, or [`Error::FormatRead`] when
81/// the display bytes are not a supported PowerWorld `.pwd` shape.
82pub fn parse_pwd_file(path: impl AsRef<Path>) -> Result<PwdDisplay> {
83    let bytes = std::fs::read(path)?;
84    parse_pwd_display(&bytes)
85}
86
87/// Parse a `.pwd` display file, returning metadata and decoded substations.
88///
89/// # Errors
90/// [`Error::FormatRead`] when the header is not the known display shape,
91/// or no unique drawing record group links to the identity rows.
92pub fn parse_pwd_display(bytes: &[u8]) -> Result<PwdDisplay> {
93    parse_pwd_inner(bytes)
94}
95
96/// Parse the substation coordinates out of `.pwd` bytes.
97///
98/// # Errors
99/// [`Error::FormatRead`] when the header is not the known display shape,
100/// or no unique drawing record group links to the identity rows.
101pub fn parse_pwd(bytes: &[u8]) -> Result<Vec<PwdSubstation>> {
102    parse_pwd_display(bytes).map(|display| display.substations)
103}
104
105fn pwd_err(message: impl Into<String>) -> Error {
106    Error::FormatRead {
107        format: FMT,
108        message: message.into(),
109    }
110}
111
112fn parse_pwd_header(bytes: &[u8]) -> Result<(u16, u16, u32)> {
113    let (Some(header), Some(canvas_width), Some(canvas_height)) =
114        (u32_at(bytes, 0), u16_at(bytes, 4), u16_at(bytes, 6))
115    else {
116        let header = u32_at(bytes, 0).unwrap_or(0);
117        return Err(pwd_err(format!(
118            "not a recognized PowerWorld display file (header word {header}; the probed saves all \
119             carry 50)",
120        )));
121    };
122    if bytes.len() < 0x40 || header != 50 {
123        return Err(pwd_err(format!(
124            "not a recognized PowerWorld display file (header word {header}; the probed saves all \
125             carry 50)",
126        )));
127    }
128    if canvas_width == 0 || canvas_height == 0 {
129        return Err(pwd_err("display header canvas dimensions are zero"));
130    }
131    let stamp = u32_at(bytes, 22).unwrap_or(0);
132    if stamp == 0 {
133        return Err(pwd_err(
134            "display header stamp is zero; every validated save carries a nonzero stamp the \
135             drawing records repeat",
136        ));
137    }
138    Ok((canvas_width, canvas_height, stamp))
139}
140
141fn parse_pwd_inner(bytes: &[u8]) -> Result<PwdDisplay> {
142    let (canvas_width, canvas_height, stamp) = parse_pwd_header(bytes)?;
143
144    let identity = find_identity_table(bytes)?;
145    if identity.is_empty() {
146        return Ok(PwdDisplay {
147            canvas_width,
148            canvas_height,
149            stamp,
150            substations: Vec::new(),
151        });
152    }
153
154    // Every drawing object record repeats the header stamp at +18 and dual
155    // encodes its position (f64 at +22/+30, f32 echo at +2/+6); the scan
156    // collects every offset with that shape and groups by the u16 type tag.
157    let mut groups: Vec<(u16, Vec<DrawRecord>)> = Vec::new();
158    for i in 0..bytes.len().saturating_sub(38) {
159        if u32_at(bytes, i + 18) != Some(stamp) {
160            continue;
161        }
162        let (Some(x), Some(y)) = (f64_at(bytes, i + 22), f64_at(bytes, i + 30)) else {
163            continue;
164        };
165        if !x.is_finite() || !y.is_finite() {
166            continue;
167        }
168        #[allow(clippy::cast_possible_truncation)] // the echo is the f32 rounding by design
169        let (rx, ry) = (x as f32, y as f32);
170        // Bit equality: the magnitude gate below excludes zero, so the only
171        // value the echo can hold is the rounded f64 itself.
172        if f32_at(bytes, i + 2).map(f32::to_bits) != Some(rx.to_bits())
173            || f32_at(bytes, i + 6).map(f32::to_bits) != Some(ry.to_bits())
174        {
175            continue;
176        }
177        let magnitude = x.abs().max(y.abs());
178        if !(1.0..1.0e7).contains(&magnitude) {
179            continue;
180        }
181        let Some(tag) = u16_at(bytes, i) else {
182            continue;
183        };
184        let rec = DrawRecord { at: i, x, y };
185        match groups.iter_mut().find(|(t, _)| *t == tag) {
186            Some((_, v)) => v.push(rec),
187            None => groups.push((tag, vec![rec])),
188        }
189    }
190
191    // The substation group is the one whose records, in stream order, link
192    // every identity row in table order: a marker byte (0x03 or 0x07 by
193    // era) followed by the row's u32 number, somewhere in the style tail.
194    // Field label decoys carry other markers (0x05 observed) or another
195    // order and fail; ambiguity is a loud error, never a pick.
196    let matches: Vec<&(u16, Vec<DrawRecord>)> = groups
197        .iter()
198        .filter(|(_, records)| {
199            records.len() == identity.len()
200                && records
201                    .iter()
202                    .zip(&identity)
203                    .all(|(rec, (number, _))| links_number(bytes, rec.at, *number))
204        })
205        .collect();
206    let (_, records) = match matches.as_slice() {
207        [one] => *one,
208        [] => {
209            return Err(pwd_err(format!(
210                "no drawing record group links the {} substation identity rows; the \
211                 DisplaySubstation layout of this save is not the validated one",
212                identity.len()
213            )));
214        }
215        several => {
216            return Err(pwd_err(format!(
217                "{} drawing record groups link the substation identity rows; refusing to guess \
218                 between them",
219                several.len()
220            )));
221        }
222    };
223
224    let substations = records
225        .iter()
226        .zip(identity)
227        .map(|(rec, (number, name))| PwdSubstation {
228            number,
229            name,
230            x: rec.x,
231            y: rec.y,
232        })
233        .collect();
234    Ok(PwdDisplay {
235        canvas_width,
236        canvas_height,
237        stamp,
238        substations,
239    })
240}
241
242/// A drawing record that passed the shape gate: its stream offset (for the
243/// identity link check) and the decoded coordinates, kept so the final mapping
244/// never re-reads the bytes.
245struct DrawRecord {
246    at: usize,
247    x: f64,
248    y: f64,
249}
250
251/// The substation identity table: exactly one valid walk behind a
252/// `ff ff ff ff 3d 0f` anchor. A missing table means there are no decoded
253/// substation symbols. Several tables are a loud error.
254fn find_identity_table(b: &[u8]) -> Result<Vec<(u32, String)>> {
255    let mut tables = Vec::new();
256    for at in memmem(b, &IDENTITY_TAG) {
257        if let Some(rows) = identity_walk(b, at + IDENTITY_TAG.len()) {
258            tables.push(rows);
259        }
260    }
261    match tables.len() {
262        1 => Ok(tables.pop().unwrap()),
263        0 => Ok(Vec::new()),
264        n => Err(Error::FormatRead {
265            format: FMT,
266            message: format!(
267                "{n} byte ranges walk as a substation identity table; refusing to guess \
268                 between them"
269            ),
270        }),
271    }
272}
273
274/// Walk identity records (`u32 number, u32 duplicate, u32 length, name,
275/// 0x02`) from `at` until the next `ff ff ff ff` sentinel, which must
276/// arrive exactly at a record boundary. At least one record, numbers
277/// unique and plausible, names printable.
278fn identity_walk(b: &[u8], mut at: usize) -> Option<Vec<(u32, String)>> {
279    let mut rows = Vec::new();
280    let mut seen = HashSet::new();
281    loop {
282        if b.get(at..).and_then(|s| s.get(..4)) == Some([0xff; 4].as_slice()) {
283            return (!rows.is_empty()).then_some(rows);
284        }
285        let number = u32_at(b, at)?;
286        let duplicate_at = at.checked_add(4)?;
287        if number == 0 || number > 99_999_999 || u32_at(b, duplicate_at) != Some(number) {
288            return None;
289        }
290        let len_at = at.checked_add(8)?;
291        let len = u32_at(b, len_at)? as usize;
292        if len == 0 || len >= 64 {
293            return None;
294        }
295        let name_start = at.checked_add(12)?;
296        let name_end = name_start.checked_add(len)?;
297        let name = b.get(name_start..name_end)?;
298        if !name.iter().all(|&c| (0x20..0x7f).contains(&c)) || b.get(name_end) != Some(&0x02) {
299            return None;
300        }
301        if !seen.insert(number) {
302            return None;
303        }
304        rows.push((number, String::from_utf8_lossy(name).into_owned()));
305        at = name_end.checked_add(1)?;
306    }
307}
308
309/// Whether the drawing record at `i` links `number`: a marker byte 0x03 or
310/// 0x07 (the substation symbol markers of the two observed eras) directly
311/// followed by the number, inside the style tail window. The window is
312/// variable because a digit string of 1 to 4 characters precedes the link
313/// in some saves.
314fn links_number(b: &[u8], i: usize, number: u32) -> bool {
315    (40..140).any(|d| {
316        let Some(marker_at) = i.checked_add(d) else {
317            return false;
318        };
319        let Some(number_at) = marker_at.checked_add(1) else {
320            return false;
321        };
322        matches!(b.get(marker_at), Some(0x03 | 0x07)) && u32_at(b, number_at) == Some(number)
323    })
324}
325
326/// Every start of `needle` in `haystack`.
327fn memmem<'a>(haystack: &'a [u8], needle: &'a [u8]) -> impl Iterator<Item = usize> + 'a {
328    haystack
329        .windows(needle.len())
330        .enumerate()
331        .filter_map(move |(i, w)| (w == needle).then_some(i))
332}
333
334// Total little endian reads: `None` past the end of the buffer, no index
335// arithmetic that can panic or wrap. Every offset in this reader derives
336// from untrusted file bytes, so the accessors carry the bounds check.
337
338fn u16_at(b: &[u8], i: usize) -> Option<u16> {
339    Some(u16::from_le_bytes(*b.get(i..)?.first_chunk()?))
340}
341
342fn u32_at(b: &[u8], i: usize) -> Option<u32> {
343    Some(u32::from_le_bytes(*b.get(i..)?.first_chunk()?))
344}
345
346fn f32_at(b: &[u8], i: usize) -> Option<f32> {
347    Some(f32::from_le_bytes(*b.get(i..)?.first_chunk()?))
348}
349
350fn f64_at(b: &[u8], i: usize) -> Option<f64> {
351    Some(f64::from_le_bytes(*b.get(i..)?.first_chunk()?))
352}