powerio/format/powerworld/pwd.rs
1//! Read substation coordinates from PowerWorld `.pwd` display files
2//! (read only).
3//!
4//! A `.pwd` is the diagram sibling of a case: drawing records for buses,
5//! branches, substations, and field labels. This reader decodes the one
6//! subset with a differential oracle, the substation symbols, and leaves
7//! every other drawing object undecoded. Files without the substation table
8//! still return display metadata with an empty substation list. The evidence
9//! (seven files across
10//! the 2016 through 2022 writer eras, each matched 1-1 against the
11//! latitude/longitude its same vintage aux carries per substation, except
12//! the v19 resave, which matches 1248/1250 against the published case
13//! across a vintage skew) is in
14//! `powerio/src/format/powerworld/FORMAT.md`.
15//!
16//! Two structures carry the data, both present in every probed save:
17//!
18//! - The substation identity table, behind the only `ff ff ff ff 3d 0f`
19//! byte sequence in the file (sentinel plus table tag 0x0f3d): records of
20//! `u32 number, u32 number (exact duplicate), u32 length, name, 0x02`,
21//! terminated exactly by the next `ff ff ff ff`. The order is display
22//! order, not case order.
23//! - The DisplaySubstation drawing records: each repeats the file's header
24//! stamp (the u32 at offset 22) at +18, stores the position as f64 x/y at
25//! +22/+30 with an f32 echo of both at +2/+6, and links its substation
26//! number behind a marker byte (0x03 or 0x07 by writer era) in the style
27//! tail. The record's type tag (the u16 at +0) varies per save, so the
28//! reader keys on this structure instead: stamp echo, dual encoded
29//! coordinates, and a link to every identity row in table order. Decoy
30//! groups exist (field label records with the same count and plausible
31//! coordinates) and fail the link gauntlet; if more than one group ever
32//! passes, the reader rejects rather than guesses.
33//!
34//! The coordinates are diagram positions (y north positive), not
35//! geography: no probed file stores latitude or longitude directly. The
36//! auto generated TAMU and Hawaii layouts equal a Mercator projection
37//! (`x = k * longitude`, `y = k * merc(latitude)`, k = 535.81608... on the
38//! never edited Hawaii40 file, bit exact), but hand moved symbols and the
39//! June 2016 era deviate, so the values are exposed as stored and any
40//! projection is the consumer's choice. Consumers wanting geography should
41//! read the aux.
42
43use std::collections::HashSet;
44use std::path::Path;
45
46use crate::{Error, Result};
47
48const FMT: &str = "PowerWorld .pwd";
49
50/// The identity table tag behind the `ff ff ff ff` sentinel.
51const IDENTITY_TAG: [u8; 6] = [0xff, 0xff, 0xff, 0xff, 0x3d, 0x0f];
52
53/// One substation symbol from a display file: the identity row joined with
54/// its drawing record, in identity table (display) order. `x` and `y` are
55/// diagram coordinates as stored, y north positive (see the module docs).
56#[derive(Debug, Clone, PartialEq)]
57pub struct PwdSubstation {
58 pub number: u32,
59 pub name: String,
60 pub x: f64,
61 pub y: f64,
62}
63
64/// Decoded PowerWorld display file content.
65///
66/// A `.pwd` is not a case file and does not carry a [`Network`](crate::Network).
67/// This structure exposes the display metadata the reader validates plus the
68/// supported drawing object subset.
69#[derive(Debug, Clone, PartialEq)]
70pub struct PwdDisplay {
71 pub canvas_width: u16,
72 pub canvas_height: u16,
73 pub stamp: u32,
74 pub substations: Vec<PwdSubstation>,
75}
76
77/// Read and parse a `.pwd` display file.
78///
79/// # Errors
80/// [`Error::Io`] when the file cannot be read, or [`Error::FormatRead`] when
81/// the display bytes are not a supported PowerWorld `.pwd` shape.
82pub fn parse_pwd_file(path: impl AsRef<Path>) -> Result<PwdDisplay> {
83 let bytes = std::fs::read(path)?;
84 parse_pwd_display(&bytes)
85}
86
87/// Parse a `.pwd` display file, returning metadata and decoded substations.
88///
89/// # Errors
90/// [`Error::FormatRead`] when the header is not the known display shape,
91/// or no unique drawing record group links to the identity rows.
92pub fn parse_pwd_display(bytes: &[u8]) -> Result<PwdDisplay> {
93 parse_pwd_inner(bytes)
94}
95
96/// Parse the substation coordinates out of `.pwd` bytes.
97///
98/// # Errors
99/// [`Error::FormatRead`] when the header is not the known display shape,
100/// or no unique drawing record group links to the identity rows.
101pub fn parse_pwd(bytes: &[u8]) -> Result<Vec<PwdSubstation>> {
102 parse_pwd_display(bytes).map(|display| display.substations)
103}
104
105fn pwd_err(message: impl Into<String>) -> Error {
106 Error::FormatRead {
107 format: FMT,
108 message: message.into(),
109 }
110}
111
112fn parse_pwd_header(bytes: &[u8]) -> Result<(u16, u16, u32)> {
113 let (Some(header), Some(canvas_width), Some(canvas_height)) =
114 (u32_at(bytes, 0), u16_at(bytes, 4), u16_at(bytes, 6))
115 else {
116 let header = u32_at(bytes, 0).unwrap_or(0);
117 return Err(pwd_err(format!(
118 "not a recognized PowerWorld display file (header word {header}; the probed saves all \
119 carry 50)",
120 )));
121 };
122 if bytes.len() < 0x40 || header != 50 {
123 return Err(pwd_err(format!(
124 "not a recognized PowerWorld display file (header word {header}; the probed saves all \
125 carry 50)",
126 )));
127 }
128 if canvas_width == 0 || canvas_height == 0 {
129 return Err(pwd_err("display header canvas dimensions are zero"));
130 }
131 let stamp = u32_at(bytes, 22).unwrap_or(0);
132 if stamp == 0 {
133 return Err(pwd_err(
134 "display header stamp is zero; every validated save carries a nonzero stamp the \
135 drawing records repeat",
136 ));
137 }
138 Ok((canvas_width, canvas_height, stamp))
139}
140
141fn parse_pwd_inner(bytes: &[u8]) -> Result<PwdDisplay> {
142 let (canvas_width, canvas_height, stamp) = parse_pwd_header(bytes)?;
143
144 let identity = find_identity_table(bytes)?;
145 if identity.is_empty() {
146 return Ok(PwdDisplay {
147 canvas_width,
148 canvas_height,
149 stamp,
150 substations: Vec::new(),
151 });
152 }
153
154 // Every drawing object record repeats the header stamp at +18 and dual
155 // encodes its position (f64 at +22/+30, f32 echo at +2/+6); the scan
156 // collects every offset with that shape and groups by the u16 type tag.
157 let mut groups: Vec<(u16, Vec<DrawRecord>)> = Vec::new();
158 for i in 0..bytes.len().saturating_sub(38) {
159 if u32_at(bytes, i + 18) != Some(stamp) {
160 continue;
161 }
162 let (Some(x), Some(y)) = (f64_at(bytes, i + 22), f64_at(bytes, i + 30)) else {
163 continue;
164 };
165 if !x.is_finite() || !y.is_finite() {
166 continue;
167 }
168 #[allow(clippy::cast_possible_truncation)] // the echo is the f32 rounding by design
169 let (rx, ry) = (x as f32, y as f32);
170 // Bit equality: the magnitude gate below excludes zero, so the only
171 // value the echo can hold is the rounded f64 itself.
172 if f32_at(bytes, i + 2).map(f32::to_bits) != Some(rx.to_bits())
173 || f32_at(bytes, i + 6).map(f32::to_bits) != Some(ry.to_bits())
174 {
175 continue;
176 }
177 let magnitude = x.abs().max(y.abs());
178 if !(1.0..1.0e7).contains(&magnitude) {
179 continue;
180 }
181 let Some(tag) = u16_at(bytes, i) else {
182 continue;
183 };
184 let rec = DrawRecord { at: i, x, y };
185 match groups.iter_mut().find(|(t, _)| *t == tag) {
186 Some((_, v)) => v.push(rec),
187 None => groups.push((tag, vec![rec])),
188 }
189 }
190
191 // The substation group is the one whose records, in stream order, link
192 // every identity row in table order: a marker byte (0x03 or 0x07 by
193 // era) followed by the row's u32 number, somewhere in the style tail.
194 // Field label decoys carry other markers (0x05 observed) or another
195 // order and fail; ambiguity is a loud error, never a pick.
196 let matches: Vec<&(u16, Vec<DrawRecord>)> = groups
197 .iter()
198 .filter(|(_, records)| {
199 records.len() == identity.len()
200 && records
201 .iter()
202 .zip(&identity)
203 .all(|(rec, (number, _))| links_number(bytes, rec.at, *number))
204 })
205 .collect();
206 let (_, records) = match matches.as_slice() {
207 [one] => *one,
208 [] => {
209 return Err(pwd_err(format!(
210 "no drawing record group links the {} substation identity rows; the \
211 DisplaySubstation layout of this save is not the validated one",
212 identity.len()
213 )));
214 }
215 several => {
216 return Err(pwd_err(format!(
217 "{} drawing record groups link the substation identity rows; refusing to guess \
218 between them",
219 several.len()
220 )));
221 }
222 };
223
224 let substations = records
225 .iter()
226 .zip(identity)
227 .map(|(rec, (number, name))| PwdSubstation {
228 number,
229 name,
230 x: rec.x,
231 y: rec.y,
232 })
233 .collect();
234 Ok(PwdDisplay {
235 canvas_width,
236 canvas_height,
237 stamp,
238 substations,
239 })
240}
241
242/// A drawing record that passed the shape gate: its stream offset (for the
243/// identity link check) and the decoded coordinates, kept so the final mapping
244/// never re-reads the bytes.
245struct DrawRecord {
246 at: usize,
247 x: f64,
248 y: f64,
249}
250
251/// The substation identity table: exactly one valid walk behind a
252/// `ff ff ff ff 3d 0f` anchor. A missing table means there are no decoded
253/// substation symbols. Several tables are a loud error.
254fn find_identity_table(b: &[u8]) -> Result<Vec<(u32, String)>> {
255 let mut tables = Vec::new();
256 for at in memmem(b, &IDENTITY_TAG) {
257 if let Some(rows) = identity_walk(b, at + IDENTITY_TAG.len()) {
258 tables.push(rows);
259 }
260 }
261 match tables.len() {
262 1 => Ok(tables.pop().unwrap()),
263 0 => Ok(Vec::new()),
264 n => Err(Error::FormatRead {
265 format: FMT,
266 message: format!(
267 "{n} byte ranges walk as a substation identity table; refusing to guess \
268 between them"
269 ),
270 }),
271 }
272}
273
274/// Walk identity records (`u32 number, u32 duplicate, u32 length, name,
275/// 0x02`) from `at` until the next `ff ff ff ff` sentinel, which must
276/// arrive exactly at a record boundary. At least one record, numbers
277/// unique and plausible, names printable.
278fn identity_walk(b: &[u8], mut at: usize) -> Option<Vec<(u32, String)>> {
279 let mut rows = Vec::new();
280 let mut seen = HashSet::new();
281 loop {
282 if b.get(at..).and_then(|s| s.get(..4)) == Some([0xff; 4].as_slice()) {
283 return (!rows.is_empty()).then_some(rows);
284 }
285 let number = u32_at(b, at)?;
286 let duplicate_at = at.checked_add(4)?;
287 if number == 0 || number > 99_999_999 || u32_at(b, duplicate_at) != Some(number) {
288 return None;
289 }
290 let len_at = at.checked_add(8)?;
291 let len = u32_at(b, len_at)? as usize;
292 if len == 0 || len >= 64 {
293 return None;
294 }
295 let name_start = at.checked_add(12)?;
296 let name_end = name_start.checked_add(len)?;
297 let name = b.get(name_start..name_end)?;
298 if !name.iter().all(|&c| (0x20..0x7f).contains(&c)) || b.get(name_end) != Some(&0x02) {
299 return None;
300 }
301 if !seen.insert(number) {
302 return None;
303 }
304 rows.push((number, String::from_utf8_lossy(name).into_owned()));
305 at = name_end.checked_add(1)?;
306 }
307}
308
309/// Whether the drawing record at `i` links `number`: a marker byte 0x03 or
310/// 0x07 (the substation symbol markers of the two observed eras) directly
311/// followed by the number, inside the style tail window. The window is
312/// variable because a digit string of 1 to 4 characters precedes the link
313/// in some saves.
314fn links_number(b: &[u8], i: usize, number: u32) -> bool {
315 (40..140).any(|d| {
316 let Some(marker_at) = i.checked_add(d) else {
317 return false;
318 };
319 let Some(number_at) = marker_at.checked_add(1) else {
320 return false;
321 };
322 matches!(b.get(marker_at), Some(0x03 | 0x07)) && u32_at(b, number_at) == Some(number)
323 })
324}
325
326/// Every start of `needle` in `haystack`.
327fn memmem<'a>(haystack: &'a [u8], needle: &'a [u8]) -> impl Iterator<Item = usize> + 'a {
328 haystack
329 .windows(needle.len())
330 .enumerate()
331 .filter_map(move |(i, w)| (w == needle).then_some(i))
332}
333
334// Total little endian reads: `None` past the end of the buffer, no index
335// arithmetic that can panic or wrap. Every offset in this reader derives
336// from untrusted file bytes, so the accessors carry the bounds check.
337
338fn u16_at(b: &[u8], i: usize) -> Option<u16> {
339 Some(u16::from_le_bytes(*b.get(i..)?.first_chunk()?))
340}
341
342fn u32_at(b: &[u8], i: usize) -> Option<u32> {
343 Some(u32::from_le_bytes(*b.get(i..)?.first_chunk()?))
344}
345
346fn f32_at(b: &[u8], i: usize) -> Option<f32> {
347 Some(f32::from_le_bytes(*b.get(i..)?.first_chunk()?))
348}
349
350fn f64_at(b: &[u8], i: usize) -> Option<f64> {
351 Some(f64::from_le_bytes(*b.get(i..)?.first_chunk()?))
352}