base/charset.rs
1//! Character set conversions.
2//!
3//! Unicode to and from Lincoln Writer characters. No support for
4//! colour shifting. Limited support for overstrke characters (such
5//! as the LW circle (0o73 upper case) overstruck with logical or
6//! (0o22 lower case); these are currently supported only as Unicode
7//! combining characters.
8//!
9//! The Xerox printer uses a different character set but this code
10//! doesn't currently include a mapping for it.
11//!
12//! Controlling documentation:
13//!
14//! - [Table 7-6 in the User
15//! Handbook](https://archive.org/details/tx-2-users-handbook-nov-63/page/n195)
16//! describes the Lincoln Writer codes.
17//! - [Table 7-5 in the User
18//! Handbook](https://archive.org/details/tx-2-users-handbook-nov-63/page/n195) describes the character codes for the Xerox printer. This code doesn't yet implement this mapping.
19//! - [The Lincoln Keyboard - a typewriter keyboard designed for
20//! computers imput flexibility. A. Vanderburgh. Communications of
21//! the ACM, Volume 1, Issue 7, July
22//! 1958.](https://dl.acm.org/doi/10.1145/368873.368879) describes
23//! the Lincoln Writer keyboard and the fact that some characters
24//! do not advance the print carriage.
25//! - The Lincoln Lab Division 6 Quarterly Progress Report (15 June
26//! 1958).
27//! - [The Lincoln Writer](https://apps.dtic.mil/sti/trecms/pdf/AD0235247.pdf).
28//! J. T. Glmore, Jr., R. E. Sewell. Lincoln Laboratory Group report
29//! 51-8. October 6, 1959.
30use std::collections::HashMap;
31use std::error::Error;
32use std::fmt::{self, Display, Formatter};
33
34use super::{Unsigned6Bit, u6};
35
36#[cfg(test)]
37mod tests;
38
39#[derive(Debug, Clone, Copy)]
40pub struct NoSubscriptKnown(char);
41
42impl Display for NoSubscriptKnown {
43 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
44 write!(
45 f,
46 "no subscript mapping is yet implemented for '{}'",
47 self.0
48 )
49 }
50}
51
52impl Error for NoSubscriptKnown {}
53
54/// Return the corresponding subscript representation for `ch`.
55///
56/// # Errors
57/// `NoSubscriptKnown` when no corresponding subscript is known,
58pub const fn subscript_char(ch: char) -> Result<char, NoSubscriptKnown> {
59 // The cases here are ordered so as to make it obvious when an
60 // item is missing, and so we expect that some of the failure
61 // cases will have the same bodies.
62 match ch {
63 '0' => Ok('\u{2080}'), // ₀
64 '1' => Ok('\u{2081}'), // ₁
65 '2' => Ok('\u{2082}'), // ₂
66 '3' => Ok('\u{2083}'), // ₃
67 '4' => Ok('\u{2084}'), // ₄
68 '5' => Ok('\u{2085}'), // ₅
69 '6' => Ok('\u{2086}'), // ₆
70 '7' => Ok('\u{2087}'), // ₇
71 '8' => Ok('\u{2088}'), // ₈
72 '9' => Ok('\u{2089}'), // ₉
73 '+' => Ok('\u{208A}'), // '₊'
74 '-' => Ok('\u{208B}'), // ₋
75 '.' => Ok('.'), // there appears to be no subscript version
76 _ => Err(NoSubscriptKnown(ch)),
77 }
78}
79
80#[derive(Debug, Clone, Copy, Eq, PartialEq)]
81pub struct NoSuperscriptKnown(char);
82
83impl Display for NoSuperscriptKnown {
84 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
85 write!(
86 f,
87 "no superscript mapping is yet implemented for '{}'",
88 self.0
89 )
90 }
91}
92
93impl Error for NoSuperscriptKnown {}
94
95/// Return the corresponding superscript representation for `ch`.
96///
97/// # Errors
98/// `NoSuperscriptKnown` when no corresponding superscript is known,
99pub fn superscript_char(ch: char) -> Result<char, NoSuperscriptKnown> {
100 // The cases here are ordered so as to make it obvious when an
101 // item is missing, and so we expect that some of the failure
102 // cases will have the same bodies.
103 #[allow(clippy::match_same_arms)]
104 match ch {
105 '0' => Ok('\u{2070}'),
106 '1' => Ok('\u{00B9}'),
107 '2' => Ok('\u{00B2}'),
108 '3' => Ok('\u{00B3}'),
109 '4' => Ok('\u{2074}'),
110 '5' => Ok('\u{2075}'),
111 '6' => Ok('\u{2076}'),
112 '7' => Ok('\u{2077}'),
113 '8' => Ok('\u{2078}'),
114 '9' => Ok('\u{2079}'),
115 'A' => Ok('ᴬ'),
116 'B' => Ok('ᴮ'),
117 'C' => Ok('\u{A7F2}'),
118 'D' => Ok('ᴰ'),
119 'E' => Ok('ᴱ'),
120 'F' => Ok('\u{A7F3}'),
121 'G' => Ok('ᴳ'),
122 'H' => Ok('ᴴ'),
123 'I' => Ok('ᴵ'),
124 'J' => Ok('ᴶ'),
125 'K' => Ok('ᴷ'),
126 'L' => Ok('ᴸ'),
127 'M' => Ok('ᴹ'),
128 'N' => Ok('ᴺ'),
129 'O' => Ok('ᴼ'),
130 'P' => Ok('ᴾ'),
131 'Q' => Ok('\u{A7F4}'),
132 'R' => Ok('ᴿ'),
133 // There is no Unicode superscript 'S', U+2E2 is a superscript 's'.
134 'T' => Ok('ᵀ'),
135 'U' => Ok('ᵁ'),
136 'V' => Ok('ⱽ'),
137 'W' => Ok('ᵂ'),
138 'X' => Ok('\u{2093}'),
139 'Y' | 'Z' => Err(NoSuperscriptKnown(ch)),
140 '+' => Ok('\u{207A}'),
141 '-' => Ok('\u{207B}'),
142 _ => Err(NoSuperscriptKnown(ch)),
143 }
144}
145
146impl Display for LincolnToUnicodeStrictConversionFailure {
147 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
148 match self {
149 LincolnToUnicodeStrictConversionFailure::CannotSubscript(
150 _,
151 LincolnChar::Unprintable(n),
152 )
153 | LincolnToUnicodeStrictConversionFailure::CannotSuperscript(
154 _,
155 LincolnChar::Unprintable(n),
156 ) => {
157 write!(
158 f,
159 "cannot convert code {n:#o} from Lincoln Writer character set to Unicode, because it has no printable representation",
160 )
161 }
162 LincolnToUnicodeStrictConversionFailure::CannotSubscript(
163 u,
164 LincolnChar::UnicodeBaseChar(ch),
165 ) => {
166 write!(
167 f,
168 "cannot convert {u:#o} from Lincoln Writer character set to Unicode, because Unicode has no subscript form of '{ch}'",
169 )
170 }
171 LincolnToUnicodeStrictConversionFailure::CannotSuperscript(
172 u,
173 LincolnChar::UnicodeBaseChar(ch),
174 ) => {
175 write!(
176 f,
177 "cannot convert {u:#o} from Lincoln Writer character set to Unicode, because Unicode has no superscript form of '{ch}'",
178 )
179 }
180 }
181 }
182}
183
184#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
185pub enum Script {
186 Normal,
187 Super,
188 Sub,
189}
190
191impl Script {
192 #[must_use]
193 pub fn shift(&self) -> u32 {
194 match self {
195 Script::Super => 30, // This is a config value.
196 Script::Sub => 18, // This is an index value
197 Script::Normal => 0, // e.g. an address value
198 }
199 }
200}
201
202#[derive(Debug, Clone, Copy, Eq, PartialEq)]
203pub enum Colour {
204 Black,
205 Red,
206}
207
208/// Indicates which keyboard case is currently selected. The
209/// terminology used around the Lincoln Writer is very confusing
210/// because of the way the LW is designed. Specifically (per page 8
211/// of "The Lincoln Writer", Lincoln Laboratory Group Report 51-8):
212///
213/// > The keyboard is actually two separate Soroban coding keyboards
214/// > mounted on the same block. The lower keyboard contains the buttons
215/// > for all the lower case characters and the typewriter
216/// > functions. The upper board contains the buttons for upper case
217/// > characters and a few special codes.
218#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
219pub enum LwKeyboardCase {
220 /// Lower keyboard case (which contains capital letters)
221 Lower,
222 /// Upper keyboard case (which contains small letters, Greek
223 /// letters, etc.)
224 Upper,
225}
226
227impl LwKeyboardCase {
228 fn as_str(self) -> &'static str {
229 match self {
230 LwKeyboardCase::Lower => "lower",
231 LwKeyboardCase::Upper => "upper",
232 }
233 }
234}
235
236#[derive(Debug, Clone, Copy, Eq, PartialEq)]
237pub struct LincolnState {
238 pub script: Script,
239 pub case: LwKeyboardCase,
240 pub colour: Colour,
241}
242
243#[derive(Debug, Clone, Copy, Eq, PartialEq)]
244pub struct LincolnStateTextInfo {
245 pub script: &'static str,
246 pub case: &'static str,
247 pub colour: &'static str,
248}
249
250impl Default for LincolnState {
251 fn default() -> Self {
252 // Carriage return sets the LW to lower case (which contains
253 // the capital letters!) and normal script so those are the
254 // defaults. See pages 4-37 to 4-42 of the User Handbook, and
255 // page 8 of "The Lincoln Writer" (Lincoln Laboratory Group
256 // Report 51-8).
257 Self {
258 script: Script::Normal,
259 case: LwKeyboardCase::Lower,
260 colour: Colour::Black,
261 }
262 }
263}
264
265impl LincolnState {
266 /// CARRIAGE RETURN also has the side effect of setting the
267 /// "keyboard" to lower case (i.e. capital letters!) and "normal
268 /// script". This statement appears in the description if the
269 /// Lincoln Writer in the Users Handbook (page 4-37 and again on
270 /// 4-41). The document explicitly states that a write of this
271 /// code (from the TX-2 to the Lincoln Writer) also affects the
272 /// state of the keyboard. On page 4-41 the document also states
273 /// that carriage return written by the TX-2 to the Lincoln Writer
274 /// has the same effect.
275 ///
276 /// Page 8 of "The Lincoln Writer" (Lincoln Laboratory Group
277 /// Report 51-8) points out that the "lower case" has the capital
278 /// letters on it.
279 ///
280 /// XXX: both of the previous two statements describe the TX2->LW
281 /// direction, re-check the documentation for what happens in the
282 /// other direction.
283 fn on_carriage_return(&mut self) {
284 self.script = Script::Normal;
285 self.case = LwKeyboardCase::Lower;
286 }
287}
288
289impl From<&LincolnState> for LincolnStateTextInfo {
290 fn from(state: &LincolnState) -> LincolnStateTextInfo {
291 LincolnStateTextInfo {
292 script: match state.script {
293 Script::Normal => "Normal script",
294 Script::Super => "Superscript",
295 Script::Sub => "Subscript",
296 },
297 case: state.case.as_str(),
298 colour: match state.colour {
299 Colour::Black => "Black",
300 Colour::Red => "Red",
301 },
302 }
303 }
304}
305
306#[derive(Debug, Clone, Copy, Eq, PartialEq)]
307pub enum LincolnChar {
308 /// There is a Unicode character which we're trying to print (but
309 /// this actual Unicode character may be incorrect, i.e. it is
310 /// normal-script when the fully-described character is actually
311 /// superscript).
312 UnicodeBaseChar(char),
313 /// Unprintable chars include YES, READ IN, BEGIN, NO, and so forth.
314 Unprintable(Unsigned6Bit),
315}
316
317#[derive(Debug, Clone, Copy, Eq, PartialEq)]
318pub struct DescribedChar {
319 /// The actual character we're trying to print. The `attributes`
320 /// attribute specifies whether this is a subscript, superscript
321 /// or normal character, and what colour it is.
322 pub base_char: LincolnChar,
323 /// If the character has a direct Unicode translation, that is in
324 /// `unicode_representation`. Some characters, for example
325 /// superscript Y, have no Unicode representation.
326 pub unicode_representation: Option<char>,
327 /// Specifies whether the character is upper-case, lower-case
328 /// (both as understood in terms of normal typography, i.e. "A" is
329 /// upper-case), whether it is subscript, superscript, or normal,
330 /// and what colour it is.
331 pub attributes: LincolnState,
332 /// When advance is `true`, printing this character should advance
333 /// the printing position.
334 pub advance: bool,
335 /// Indicates whether the label on the Lincoln Writer keyboard
336 /// is the same as the Unicode representation.
337 pub label_matches_unicode: bool,
338}
339
340fn unprintable(c: Unsigned6Bit, state: LincolnState) -> DescribedChar {
341 DescribedChar {
342 base_char: LincolnChar::Unprintable(c),
343 unicode_representation: None,
344 attributes: state,
345 advance: false,
346 label_matches_unicode: false,
347 }
348}
349const fn bycase(lower: char, upper: char, state: LincolnState) -> char {
350 match state.case {
351 LwKeyboardCase::Upper => upper,
352 LwKeyboardCase::Lower => lower,
353 }
354}
355
356/// Perform any state changes implied by a character code.
357pub fn lincoln_writer_state_update(lin_ch: Unsigned6Bit, state: &mut LincolnState) {
358 match u8::from(lin_ch) {
359 0o60 => {
360 state.on_carriage_return();
361 }
362 0o63 => {
363 state.colour = Colour::Black;
364 }
365 0o64 => {
366 state.script = Script::Super;
367 }
368 0o65 => {
369 state.script = Script::Normal;
370 }
371 0o66 => {
372 state.script = Script::Sub;
373 }
374 0o67 => {
375 state.colour = Colour::Red;
376 }
377 0o74 => {
378 state.case = LwKeyboardCase::Lower;
379 }
380 0o75 => {
381 state.case = LwKeyboardCase::Upper;
382 }
383 _ => (),
384 }
385}
386
387/// Convert a Lincoln Writer character to a description which can
388/// be used to print a Unicode approximation of it.
389///
390/// In the success case we return None when the only effect of this
391/// Lincoln Writer character is to change mode (e.g. to upper case)
392/// and `Some(DescribedChar)` when there is something to print. In
393/// the `Some(DescribedChar)` case, the `DescribedChar` instance
394/// describes what is to be printed and provides a Unicode
395/// approximation to it, if there is one.
396///
397/// The character codes are shown in table 7-6 in the Users handbook.
398/// This shows two columns of characters for each code. Somewhat
399/// counterintuitively, I believe that the left-hand column is "lower
400/// case". hence for code 027 for example, 'H' is "lower case" and
401/// "x" is upper case. I believe this for the following reasons:
402///
403/// 1. because the LW defaults to "lower case" after Carriage Return,
404/// and we'd expect this to correspond to the most commonly used
405/// characters. The block capitals and digits are all in the
406/// left-hand column. There is a complete set of A-Z but there is
407/// not a complete set of a-z.
408/// 2. The layout of the Lincoln Writer keyboard is consistent with
409/// this idea. There are two keyboards, an upper and a lower. The
410/// lower keyboard contains block capitals and digits, and the
411/// upper keyboard contains minuscule letters (e.g. "q", "k").
412/// This idea is based on the Lincoln Writer diagram on page 24 of
413/// the Lincoln Lab Division 6 Quarterly Progress Report (15 June
414/// 1958). Figure 9 in in the later (1959-10-06) document Group
415/// Report 51-8 (a photograph) is mostly consistent but shows the
416/// CONTINUE and HALT keys to have been removed and LINE FEED UP
417/// and LINE FEED DOWN have been added.
418/// 3. Page 8 of "The Lincoln Writer" (Lincoln Lab Group Report 51-8)
419/// says: The lower case keyboard was almost standard (our capital
420/// letters were put on the lower case).
421pub fn lincoln_char_to_described_char(
422 lin_ch: Unsigned6Bit,
423 state: &mut LincolnState,
424) -> Option<DescribedChar> {
425 lincoln_writer_state_update(lin_ch, state);
426 let advance: bool = lin_ch != 0o12 && lin_ch != 0o13;
427 let by_case = |lower, upper: char| -> Option<char> { Some(bycase(lower, upper, *state)) };
428
429 // It's more important for the cases to be in numerical order than
430 // it is to avoid identical bodies.
431 #[allow(clippy::match_same_arms)]
432 let base_char: Option<char> = match u8::from(lin_ch) {
433 0o00 => by_case('0', '☛'), // \U261B, black hand pointing right
434 0o01 => by_case('1', 'Σ'), // \U03A3, Greek capital letter Sigma
435 0o02 => by_case('2', '|'),
436 0o03 => by_case('3', '‖'), // \U2016, double vertical line
437 0o04 => by_case('4', '/'),
438 0o05 => by_case('5', '×'), // multiplication sign (U+00D7)
439 0o06 => by_case('6', '#'),
440 0o07 => by_case('7', '→'), // rightwards arrow (U+2192)
441 0o10 => by_case('8', '<'),
442 0o11 => by_case('9', '>'),
443 0o12 => {
444 // These characters do not advance the carriage. Hence we
445 // translate the lower-case 0o12 into Unicode 'combining
446 // low line' rather than underscore.
447 by_case(
448 '\u{0332}', // combining low line
449 '\u{0305}', // combining overline
450 )
451 }
452 0o13 => {
453 // These characters do not advance the carriage.
454 by_case(
455 '\u{20DD}', // combining enclosing circle
456 '\u{20DE}', // combining enclosing square
457 )
458 }
459 0o14..=0o17 => return Some(unprintable(lin_ch, *state)), // "READ IN", "BEGIN", "NO", "YES"
460 0o20 => by_case('A', 'n'),
461 0o21 => by_case('B', '⊂'), // Subset of (U+2282)
462 0o22 => by_case('C', '∨'), // Logical or (U+2228)
463 0o23 => by_case('D', 'q'),
464 0o24 => by_case('E', 'γ'), // Greek small letter gamma (U+03B3)
465 0o25 => by_case('F', 't'),
466 0o26 => by_case('G', 'w'),
467 0o27 => by_case('H', 'x'),
468 0o30 => by_case('I', 'i'),
469 0o31 => by_case('J', 'y'),
470 0o32 => by_case('K', 'z'),
471 0o33 => by_case('L', '?'),
472 0o34 => by_case('M', '∪'), // Union, U+222A
473 0o35 => by_case('N', '∩'), // Intersection, U+2229
474 0o36 => by_case('O', 'j'),
475 0o37 => by_case('P', 'k'),
476 0o40 => by_case('Q', 'α'), // Greek small letter alpha, U+03B1
477 0o41 => by_case('R', 'Δ'), // Greek capital delta, U+0394
478 0o42 => by_case('S', 'p'),
479 // Previously we thought that the right-hand character was ∈
480 // (Element of, U+2208), but seeing the greek letters grouped
481 // in section 6-2.3 ("RULES FOR SYMEX FORMATION") shows that
482 // this is a greek letter, epsilon.
483 0o43 => by_case('T', 'ε'), // Epsilon
484 0o44 => by_case('U', 'h'),
485 0o45 => by_case('V', '⊃'), // Superset of, U+2283
486 0o46 => by_case('W', 'β'), // Greek beta symbol, U+03B2
487 0o47 => by_case('X', '∧'), // Logical And U+2227
488 0o50 => by_case('Y', 'λ'), // Greek small letter lambda, U+03BB
489 0o51 => by_case('Z', '~'),
490 0o52 => by_case('(', '{'),
491 0o53 => by_case(')', '}'),
492 0o54 => by_case('+', '≡'), // Identical to, U+2261
493 0o55 => by_case('-', '='),
494 0o56 => by_case(',', '\u{0027}'), // Single apostrophe, U+0027
495 0o57 => by_case('.', '*'),
496 0o60 => {
497 // Despite the state change, on input only the 060 is
498 // emitted by the Lincoln Writer. Carriage Return also
499 // advances the paper (i.e. performs a line feed).
500 Some('\r') // state change was already done.
501 }
502 0o61 => Some('\t'),
503 0o62 => Some('\u{0008}'), // backspace, U+0008
504 0o63 => None, // COLOR BLACK; state change already done
505 0o64 => None, // SUPER; state change already done
506 0o65 => None, // NORMAL; state change already done
507 0o66 => None, // SUB; state change already done
508 0o67 => None, // COLOR RED; state change already done
509 0o70 => Some(' '), // space
510 0o71 => return Some(unprintable(lin_ch, *state)), // WORD EXAM
511 0o72 => Some('\n'), // LINE FEED UP
512 0o73 => Some('\u{008D}'), // LINE FEED DOWN
513 0o74 => None, // LOWER CASE; state change already done
514 0o75 => None, // UPPER CASE; state change already done
515 0o76 => return Some(unprintable(lin_ch, *state)), // STOP
516 0o77 => {
517 // Supposedly NULLIFY. It's used on paper tape as a way
518 // to delete a character. Punching out all the bit holes
519 // changes the code to 0o77 and applications supposedly
520 // ignore these characters on the basis that the user has
521 // deleted them.
522 //
523 // For example suppose the user presses 'Q' followed by
524 // 'DELETE'.
525 //
526 // In off-line mode, where the LW is being used only to
527 // prepare a paper tape the TX-2 doesn't directly see the
528 // codes. The tape will be punched with code 0o40
529 // (representing 'Q') and then the same location will be
530 // re-punched with 0o77 (effectively deleting the 'Q').
531 // Later when the paper tape is read, the only code the
532 // machine will see is the 0o77 (assuming that there was
533 // no previous upper/lower case change code).
534 //
535 // In on-line mode the TX-2 will see two codes, 0o40
536 // followed by 0o77; the Lincoln Writer cannot "un-send"
537 // the 0o40. This is the same behaviour as modern
538 // computers have for DELETE. Therefore we map this code
539 // to ASCII DEL.
540 Some('\u{007F}')
541 }
542 _ => unreachable!("All Unsigned6Bit values should have been handled"),
543 };
544
545 if let Some(base) = base_char {
546 let display = match state.script {
547 Script::Normal => Some(base),
548 Script::Sub => subscript_char(base).ok(),
549 Script::Super => superscript_char(base).ok(),
550 };
551 // Non-carriage-advancing characters don't strictly match the
552 // key label, because we represent them as combining
553 // characters and so there's a space in the key label too.
554 let label_matches_unicode = if !advance {
555 false
556 } else {
557 #[allow(clippy::match_same_arms)]
558 match display {
559 None => false,
560 Some(' ') => {
561 // Here the mapping is to ' ' but in the keyboard
562 // implementation, the space bar's label is the
563 // zero-length string.
564 false
565 }
566 Some('\n' | '\r' | '\t' | '\u{0008}' | '\u{008D}' | '\u{007F}') => false,
567 Some('☛') => {
568 // On the keyboard we label this with '☞' (Unicode
569 // U+261E) instead of '☛'(U+261B) because the
570 // outline looks more readable on the drawn
571 // keyboard. So these don't match.
572 false
573 }
574 Some(_) => true,
575 }
576 };
577 Some(DescribedChar {
578 base_char: LincolnChar::UnicodeBaseChar(base),
579 unicode_representation: display,
580 attributes: *state,
581 advance,
582 label_matches_unicode,
583 })
584 } else {
585 None
586 }
587}
588
589#[derive(Debug, Clone, Copy, Eq, PartialEq)]
590pub enum LincolnToUnicodeStrictConversionFailure {
591 CannotSubscript(Unsigned6Bit, LincolnChar),
592 CannotSuperscript(Unsigned6Bit, LincolnChar),
593}
594
595/// Convert a stream of Lincoln Writer codes to a Unicode string.
596/// Lincoln Writer codes are 6 bits, and these are assumed to be in
597/// the lower 6 bits of the input values.
598///
599/// # Errors
600///
601/// If an input character is printable on the Lincoln Writer
602/// (i.e. would make a mark on the paper) but has no Unicode
603/// representation (e.g. because the LW is in superscript mode and
604/// there is no Unicode superscript character to represent the
605/// incoming LW character) then
606/// `Err(LincolnToUnicodeStrictConversionFailure)` is returned.
607pub fn lincoln_to_unicode_strict(
608 input: &[Unsigned6Bit],
609) -> Result<String, LincolnToUnicodeStrictConversionFailure> {
610 let mut result = String::with_capacity(input.len());
611 let mut state: LincolnState = LincolnState::default();
612 for byte in input {
613 match lincoln_char_to_described_char(*byte, &mut state) {
614 Some(DescribedChar {
615 base_char: LincolnChar::Unprintable(_),
616 ..
617 }) => {
618 // Codes like "YES" are handled here. When printed on
619 // the Lincoln Writer, no character is printed (though
620 // some time is taken to not print it).
621 //
622 // We do nothing (i.e. generate no error and no output
623 // character).
624 }
625 Some(DescribedChar {
626 base_char: _,
627 unicode_representation: Some(display),
628 attributes: _,
629 advance: _,
630 label_matches_unicode: _,
631 }) => {
632 result.push(display);
633 }
634 Some(DescribedChar {
635 base_char,
636 unicode_representation: None,
637 attributes,
638 advance: _,
639 label_matches_unicode: _,
640 }) => match attributes.script {
641 Script::Normal => unreachable!(),
642 Script::Sub => {
643 return Err(LincolnToUnicodeStrictConversionFailure::CannotSubscript(
644 *byte, base_char,
645 ));
646 }
647 Script::Super => {
648 return Err(LincolnToUnicodeStrictConversionFailure::CannotSuperscript(
649 *byte, base_char,
650 ));
651 }
652 },
653 None => (),
654 }
655 }
656 Ok(result)
657}
658
659#[derive(Debug, Clone, Copy, Eq, PartialEq)]
660struct LincChar {
661 state: LincolnState,
662 value: Unsigned6Bit,
663}
664
665pub struct UnicodeToLincolnMapping {
666 m: HashMap<char, LincChar>,
667}
668
669#[derive(Debug, Clone, PartialEq, Eq)]
670pub enum UnicodeToLincolnConversionFailure {
671 NoMapping(char),
672}
673
674impl Display for UnicodeToLincolnConversionFailure {
675 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
676 match self {
677 UnicodeToLincolnConversionFailure::NoMapping(ch) => {
678 write!(
679 f,
680 "there is no mapping for '{ch}' from Unicode to Lincoln Writer character set",
681 )
682 }
683 }
684 }
685}
686
687impl UnicodeToLincolnMapping {
688 #[must_use]
689 pub fn new() -> UnicodeToLincolnMapping {
690 let mut m: HashMap<char, LincChar> = HashMap::new();
691 for script in [Script::Normal, Script::Super, Script::Sub] {
692 for case in [LwKeyboardCase::Lower, LwKeyboardCase::Upper] {
693 for value in 0..=0o77 {
694 if let Ok(ch) = Unsigned6Bit::try_from(value) {
695 let mut state = LincolnState {
696 script,
697 case,
698 colour: Colour::Black,
699 };
700 if let Some(DescribedChar {
701 base_char: _,
702 unicode_representation: Some(display),
703 attributes: _,
704 advance: _,
705 label_matches_unicode: _,
706 }) = lincoln_char_to_described_char(ch, &mut state)
707 {
708 m.insert(display, LincChar { state, value: ch });
709 }
710 } else {
711 continue;
712 }
713 }
714 }
715 }
716 UnicodeToLincolnMapping { m }
717 }
718
719 /// Convert a Unicode string to a sequence of Lincoln Writer codes.
720 ///
721 /// # Errors
722 ///
723 /// `Err(UnicodeToLincolnconversionfailure)` is returned when one
724 /// of the Unicode characters in the input cannot be converted to
725 /// a Lincoln Writer code.
726 pub fn to_lincoln(
727 &self,
728 s: &str,
729 ) -> Result<Vec<Unsigned6Bit>, UnicodeToLincolnConversionFailure> {
730 let mut result: Vec<Unsigned6Bit> = Vec::with_capacity(s.len());
731 let mut current_case: Option<LwKeyboardCase> = None;
732 let mut current_script: Option<Script> = None;
733
734 for ch in s.chars() {
735 match self.m.get(&ch) {
736 None => {
737 return Err(UnicodeToLincolnConversionFailure::NoMapping(ch));
738 }
739 Some(lch) => {
740 if Some(lch.state.case) == current_case {
741 // Nothing to do
742 } else {
743 result.push(match lch.state.case {
744 LwKeyboardCase::Upper => u6!(0o75),
745 LwKeyboardCase::Lower => u6!(0o74),
746 });
747 current_case = Some(lch.state.case);
748 }
749
750 if Some(lch.state.script) == current_script {
751 // Nothing to do
752 } else {
753 result.push(match lch.state.script {
754 Script::Super => u6!(0o64),
755 Script::Normal => u6!(0o65),
756 Script::Sub => u6!(0o66),
757 });
758 current_script = Some(lch.state.script);
759 }
760
761 result.push(lch.value);
762 }
763 }
764 }
765 Ok(result)
766 }
767}
768
769impl Default for UnicodeToLincolnMapping {
770 fn default() -> UnicodeToLincolnMapping {
771 UnicodeToLincolnMapping::new()
772 }
773}