base/
charset.rs

1//! Character set conversions.
2//!
3//! Unicode to and from Lincoln Writer characters.  No support for
4//! colour shifting.  Limited support for overstrke characters (such
5//! as the LW circle (0o73 upper case) overstruck with logical or
6//! (0o22 lower case); these are currently supported only as Unicode
7//! combining characters.
8//!
9//! The Xerox printer uses a different character set but this code
10//! doesn't currently include a mapping for it.
11//!
12//! Controlling documentation:
13//!
14//! - [Table 7-6 in the User
15//!   Handbook](https://archive.org/details/tx-2-users-handbook-nov-63/page/n195)
16//!   describes the Lincoln Writer codes.
17//! - [Table 7-5 in the User
18//!   Handbook](https://archive.org/details/tx-2-users-handbook-nov-63/page/n195) describes the character codes for the Xerox printer.  This code doesn't yet implement this mapping.
19//! - [The Lincoln Keyboard - a typewriter keyboard designed for
20//!   computers imput flexibility. A. Vanderburgh.  Communications of
21//!   the ACM, Volume 1, Issue 7, July
22//!   1958.](https://dl.acm.org/doi/10.1145/368873.368879) describes
23//!   the Lincoln Writer keyboard and the fact that some characters
24//!   do not advance the print carriage.
25//! - The Lincoln Lab Division 6 Quarterly Progress Report (15 June
26//!   1958).
27//! - [The Lincoln Writer](https://apps.dtic.mil/sti/trecms/pdf/AD0235247.pdf).
28//!   J. T. Glmore, Jr., R. E. Sewell.  Lincoln Laboratory Group report
29//!   51-8.  October 6, 1959.
30use std::collections::HashMap;
31use std::error::Error;
32use std::fmt::{self, Display, Formatter};
33
34use super::{Unsigned6Bit, u6};
35
36#[cfg(test)]
37mod tests;
38
39#[derive(Debug, Clone, Copy)]
40pub struct NoSubscriptKnown(char);
41
42impl Display for NoSubscriptKnown {
43    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
44        write!(
45            f,
46            "no subscript mapping is yet implemented for '{}'",
47            self.0
48        )
49    }
50}
51
52impl Error for NoSubscriptKnown {}
53
54/// Return the corresponding subscript representation for `ch`.
55///
56/// # Errors
57/// `NoSubscriptKnown` when no corresponding subscript is known,
58pub const fn subscript_char(ch: char) -> Result<char, NoSubscriptKnown> {
59    // The cases here are ordered so as to make it obvious when an
60    // item is missing, and so we expect that some of the failure
61    // cases will have the same bodies.
62    match ch {
63        '0' => Ok('\u{2080}'), // ₀
64        '1' => Ok('\u{2081}'), // ₁
65        '2' => Ok('\u{2082}'), // ₂
66        '3' => Ok('\u{2083}'), // ₃
67        '4' => Ok('\u{2084}'), // ₄
68        '5' => Ok('\u{2085}'), // ₅
69        '6' => Ok('\u{2086}'), // ₆
70        '7' => Ok('\u{2087}'), // ₇
71        '8' => Ok('\u{2088}'), // ₈
72        '9' => Ok('\u{2089}'), // ₉
73        '+' => Ok('\u{208A}'), // '₊'
74        '-' => Ok('\u{208B}'), // ₋
75        '.' => Ok('.'),        // there appears to be no subscript version
76        _ => Err(NoSubscriptKnown(ch)),
77    }
78}
79
80#[derive(Debug, Clone, Copy, Eq, PartialEq)]
81pub struct NoSuperscriptKnown(char);
82
83impl Display for NoSuperscriptKnown {
84    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
85        write!(
86            f,
87            "no superscript mapping is yet implemented for '{}'",
88            self.0
89        )
90    }
91}
92
93impl Error for NoSuperscriptKnown {}
94
95/// Return the corresponding superscript representation for `ch`.
96///
97/// # Errors
98/// `NoSuperscriptKnown` when no corresponding superscript is known,
99pub fn superscript_char(ch: char) -> Result<char, NoSuperscriptKnown> {
100    // The cases here are ordered so as to make it obvious when an
101    // item is missing, and so we expect that some of the failure
102    // cases will have the same bodies.
103    #[allow(clippy::match_same_arms)]
104    match ch {
105        '0' => Ok('\u{2070}'),
106        '1' => Ok('\u{00B9}'),
107        '2' => Ok('\u{00B2}'),
108        '3' => Ok('\u{00B3}'),
109        '4' => Ok('\u{2074}'),
110        '5' => Ok('\u{2075}'),
111        '6' => Ok('\u{2076}'),
112        '7' => Ok('\u{2077}'),
113        '8' => Ok('\u{2078}'),
114        '9' => Ok('\u{2079}'),
115        'A' => Ok('ᴬ'),
116        'B' => Ok('ᴮ'),
117        'C' => Ok('\u{A7F2}'),
118        'D' => Ok('ᴰ'),
119        'E' => Ok('ᴱ'),
120        'F' => Ok('\u{A7F3}'),
121        'G' => Ok('ᴳ'),
122        'H' => Ok('ᴴ'),
123        'I' => Ok('ᴵ'),
124        'J' => Ok('ᴶ'),
125        'K' => Ok('ᴷ'),
126        'L' => Ok('ᴸ'),
127        'M' => Ok('ᴹ'),
128        'N' => Ok('ᴺ'),
129        'O' => Ok('ᴼ'),
130        'P' => Ok('ᴾ'),
131        'Q' => Ok('\u{A7F4}'),
132        'R' => Ok('ᴿ'),
133        // There is no Unicode superscript 'S', U+2E2 is a superscript 's'.
134        'T' => Ok('ᵀ'),
135        'U' => Ok('ᵁ'),
136        'V' => Ok('ⱽ'),
137        'W' => Ok('ᵂ'),
138        'X' => Ok('\u{2093}'),
139        'Y' | 'Z' => Err(NoSuperscriptKnown(ch)),
140        '+' => Ok('\u{207A}'),
141        '-' => Ok('\u{207B}'),
142        _ => Err(NoSuperscriptKnown(ch)),
143    }
144}
145
146impl Display for LincolnToUnicodeStrictConversionFailure {
147    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
148        match self {
149            LincolnToUnicodeStrictConversionFailure::CannotSubscript(
150                _,
151                LincolnChar::Unprintable(n),
152            )
153            | LincolnToUnicodeStrictConversionFailure::CannotSuperscript(
154                _,
155                LincolnChar::Unprintable(n),
156            ) => {
157                write!(
158                    f,
159                    "cannot convert code {n:#o} from Lincoln Writer character set to Unicode, because it has no printable representation",
160                )
161            }
162            LincolnToUnicodeStrictConversionFailure::CannotSubscript(
163                u,
164                LincolnChar::UnicodeBaseChar(ch),
165            ) => {
166                write!(
167                    f,
168                    "cannot convert {u:#o} from Lincoln Writer character set to Unicode, because Unicode has no subscript form of '{ch}'",
169                )
170            }
171            LincolnToUnicodeStrictConversionFailure::CannotSuperscript(
172                u,
173                LincolnChar::UnicodeBaseChar(ch),
174            ) => {
175                write!(
176                    f,
177                    "cannot convert {u:#o} from Lincoln Writer character set to Unicode, because Unicode has no superscript form of '{ch}'",
178                )
179            }
180        }
181    }
182}
183
184#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
185pub enum Script {
186    Normal,
187    Super,
188    Sub,
189}
190
191impl Script {
192    #[must_use]
193    pub fn shift(&self) -> u32 {
194        match self {
195            Script::Super => 30, // This is a config value.
196            Script::Sub => 18,   // This is an index value
197            Script::Normal => 0, // e.g. an address value
198        }
199    }
200}
201
202#[derive(Debug, Clone, Copy, Eq, PartialEq)]
203pub enum Colour {
204    Black,
205    Red,
206}
207
208/// Indicates which keyboard case is currently selected.  The
209/// terminology used around the Lincoln Writer is very confusing
210/// because of the way the LW is designed.  Specifically (per page 8
211/// of "The Lincoln Writer", Lincoln Laboratory Group Report 51-8):
212///
213/// > The keyboard is actually two separate Soroban coding keyboards
214/// > mounted on the same block. The lower keyboard contains the buttons
215/// > for all the lower case characters and the typewriter
216/// > functions. The upper board contains the buttons for upper case
217/// > characters and a few special codes.
218#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
219pub enum LwKeyboardCase {
220    /// Lower keyboard case (which contains capital letters)
221    Lower,
222    /// Upper keyboard case (which contains small letters, Greek
223    /// letters, etc.)
224    Upper,
225}
226
227impl LwKeyboardCase {
228    fn as_str(self) -> &'static str {
229        match self {
230            LwKeyboardCase::Lower => "lower",
231            LwKeyboardCase::Upper => "upper",
232        }
233    }
234}
235
236#[derive(Debug, Clone, Copy, Eq, PartialEq)]
237pub struct LincolnState {
238    pub script: Script,
239    pub case: LwKeyboardCase,
240    pub colour: Colour,
241}
242
243#[derive(Debug, Clone, Copy, Eq, PartialEq)]
244pub struct LincolnStateTextInfo {
245    pub script: &'static str,
246    pub case: &'static str,
247    pub colour: &'static str,
248}
249
250impl Default for LincolnState {
251    fn default() -> Self {
252        // Carriage return sets the LW to lower case (which contains
253        // the capital letters!) and normal script so those are the
254        // defaults.  See pages 4-37 to 4-42 of the User Handbook, and
255        // page 8 of "The Lincoln Writer" (Lincoln Laboratory Group
256        // Report 51-8).
257        Self {
258            script: Script::Normal,
259            case: LwKeyboardCase::Lower,
260            colour: Colour::Black,
261        }
262    }
263}
264
265impl LincolnState {
266    /// CARRIAGE RETURN also has the side effect of setting the
267    /// "keyboard" to lower case (i.e. capital letters!) and "normal
268    /// script".  This statement appears in the description if the
269    /// Lincoln Writer in the Users Handbook (page 4-37 and again on
270    /// 4-41).  The document explicitly states that a write of this
271    /// code (from the TX-2 to the Lincoln Writer) also affects the
272    /// state of the keyboard. On page 4-41 the document also states
273    /// that carriage return written by the TX-2 to the Lincoln Writer
274    /// has the same effect.
275    ///
276    /// Page 8 of "The Lincoln Writer" (Lincoln Laboratory Group
277    /// Report 51-8) points out that the "lower case" has the capital
278    /// letters on it.
279    ///
280    /// XXX: both of the previous two statements describe the TX2->LW
281    /// direction, re-check the documentation for what happens in the
282    /// other direction.
283    fn on_carriage_return(&mut self) {
284        self.script = Script::Normal;
285        self.case = LwKeyboardCase::Lower;
286    }
287}
288
289impl From<&LincolnState> for LincolnStateTextInfo {
290    fn from(state: &LincolnState) -> LincolnStateTextInfo {
291        LincolnStateTextInfo {
292            script: match state.script {
293                Script::Normal => "Normal script",
294                Script::Super => "Superscript",
295                Script::Sub => "Subscript",
296            },
297            case: state.case.as_str(),
298            colour: match state.colour {
299                Colour::Black => "Black",
300                Colour::Red => "Red",
301            },
302        }
303    }
304}
305
306#[derive(Debug, Clone, Copy, Eq, PartialEq)]
307pub enum LincolnChar {
308    /// There is a Unicode character which we're trying to print (but
309    /// this actual Unicode character may be incorrect, i.e. it is
310    /// normal-script when the fully-described character is actually
311    /// superscript).
312    UnicodeBaseChar(char),
313    /// Unprintable chars include YES, READ IN, BEGIN, NO, and so forth.
314    Unprintable(Unsigned6Bit),
315}
316
317#[derive(Debug, Clone, Copy, Eq, PartialEq)]
318pub struct DescribedChar {
319    /// The actual character we're trying to print.  The `attributes`
320    /// attribute specifies whether this is a subscript, superscript
321    /// or normal character, and what colour it is.
322    pub base_char: LincolnChar,
323    /// If the character has a direct Unicode translation, that is in
324    /// `unicode_representation`.  Some characters, for example
325    /// superscript Y, have no Unicode representation.
326    pub unicode_representation: Option<char>,
327    /// Specifies whether the character is upper-case, lower-case
328    /// (both as understood in terms of normal typography, i.e. "A" is
329    /// upper-case), whether it is subscript, superscript, or normal,
330    /// and what colour it is.
331    pub attributes: LincolnState,
332    /// When advance is `true`, printing this character should advance
333    /// the printing position.
334    pub advance: bool,
335    /// Indicates whether the label on the Lincoln Writer keyboard
336    /// is the same as the Unicode representation.
337    pub label_matches_unicode: bool,
338}
339
340fn unprintable(c: Unsigned6Bit, state: LincolnState) -> DescribedChar {
341    DescribedChar {
342        base_char: LincolnChar::Unprintable(c),
343        unicode_representation: None,
344        attributes: state,
345        advance: false,
346        label_matches_unicode: false,
347    }
348}
349const fn bycase(lower: char, upper: char, state: LincolnState) -> char {
350    match state.case {
351        LwKeyboardCase::Upper => upper,
352        LwKeyboardCase::Lower => lower,
353    }
354}
355
356/// Perform any state changes implied by a character code.
357pub fn lincoln_writer_state_update(lin_ch: Unsigned6Bit, state: &mut LincolnState) {
358    match u8::from(lin_ch) {
359        0o60 => {
360            state.on_carriage_return();
361        }
362        0o63 => {
363            state.colour = Colour::Black;
364        }
365        0o64 => {
366            state.script = Script::Super;
367        }
368        0o65 => {
369            state.script = Script::Normal;
370        }
371        0o66 => {
372            state.script = Script::Sub;
373        }
374        0o67 => {
375            state.colour = Colour::Red;
376        }
377        0o74 => {
378            state.case = LwKeyboardCase::Lower;
379        }
380        0o75 => {
381            state.case = LwKeyboardCase::Upper;
382        }
383        _ => (),
384    }
385}
386
387/// Convert a Lincoln Writer character to a description which can
388/// be used to print a Unicode approximation of it.
389///
390/// In the success case we return None when the only effect of this
391/// Lincoln Writer character is to change mode (e.g. to upper case)
392/// and `Some(DescribedChar)` when there is something to print.  In
393/// the `Some(DescribedChar)` case, the `DescribedChar` instance
394/// describes what is to be printed and provides a Unicode
395/// approximation to it, if there is one.
396///
397/// The character codes are shown in table 7-6 in the Users handbook.
398/// This shows two columns of characters for each code.  Somewhat
399/// counterintuitively, I believe that the left-hand column is "lower
400/// case".  hence for code 027 for example, 'H' is "lower case" and
401/// "x" is upper case.   I believe this for the following reasons:
402///
403/// 1. because the LW defaults to "lower case" after Carriage Return,
404///    and we'd expect this to correspond to the most commonly used
405///    characters.  The block capitals and digits are all in the
406///    left-hand column.  There is a complete set of A-Z but there is
407///    not a complete set of a-z.
408/// 2. The layout of the Lincoln Writer keyboard is consistent with
409///    this idea.  There are two keyboards, an upper and a lower.  The
410///    lower keyboard contains block capitals and digits, and the
411///    upper keyboard contains minuscule letters (e.g. "q", "k").
412///    This idea is based on the Lincoln Writer diagram on page 24 of
413///    the Lincoln Lab Division 6 Quarterly Progress Report (15 June
414///    1958).  Figure 9 in in the later (1959-10-06) document Group
415///    Report 51-8 (a photograph) is mostly consistent but shows the
416///    CONTINUE and HALT keys to have been removed and LINE FEED UP
417///    and LINE FEED DOWN have been added.
418/// 3. Page 8 of "The Lincoln Writer" (Lincoln Lab Group Report 51-8)
419///    says: The lower case keyboard was almost standard (our capital
420///    letters were put on the lower case).
421pub fn lincoln_char_to_described_char(
422    lin_ch: Unsigned6Bit,
423    state: &mut LincolnState,
424) -> Option<DescribedChar> {
425    lincoln_writer_state_update(lin_ch, state);
426    let advance: bool = lin_ch != 0o12 && lin_ch != 0o13;
427    let by_case = |lower, upper: char| -> Option<char> { Some(bycase(lower, upper, *state)) };
428
429    // It's more important for the cases to be in numerical order than
430    // it is to avoid identical bodies.
431    #[allow(clippy::match_same_arms)]
432    let base_char: Option<char> = match u8::from(lin_ch) {
433        0o00 => by_case('0', '☛'), // \U261B, black hand pointing right
434        0o01 => by_case('1', 'Σ'), // \U03A3, Greek capital letter Sigma
435        0o02 => by_case('2', '|'),
436        0o03 => by_case('3', '‖'), // \U2016, double vertical line
437        0o04 => by_case('4', '/'),
438        0o05 => by_case('5', '×'), // multiplication sign (U+00D7)
439        0o06 => by_case('6', '#'),
440        0o07 => by_case('7', '→'), // rightwards arrow (U+2192)
441        0o10 => by_case('8', '<'),
442        0o11 => by_case('9', '>'),
443        0o12 => {
444            // These characters do not advance the carriage.  Hence we
445            // translate the lower-case 0o12 into Unicode 'combining
446            // low line' rather than underscore.
447            by_case(
448                '\u{0332}', // combining low line
449                '\u{0305}', // combining overline
450            )
451        }
452        0o13 => {
453            // These characters do not advance the carriage.
454            by_case(
455                '\u{20DD}', // combining enclosing circle
456                '\u{20DE}', // combining enclosing square
457            )
458        }
459        0o14..=0o17 => return Some(unprintable(lin_ch, *state)), // "READ IN", "BEGIN", "NO", "YES"
460        0o20 => by_case('A', 'n'),
461        0o21 => by_case('B', '⊂'), // Subset of (U+2282)
462        0o22 => by_case('C', '∨'), // Logical or (U+2228)
463        0o23 => by_case('D', 'q'),
464        0o24 => by_case('E', 'γ'), // Greek small letter gamma (U+03B3)
465        0o25 => by_case('F', 't'),
466        0o26 => by_case('G', 'w'),
467        0o27 => by_case('H', 'x'),
468        0o30 => by_case('I', 'i'),
469        0o31 => by_case('J', 'y'),
470        0o32 => by_case('K', 'z'),
471        0o33 => by_case('L', '?'),
472        0o34 => by_case('M', '∪'), // Union, U+222A
473        0o35 => by_case('N', '∩'), // Intersection, U+2229
474        0o36 => by_case('O', 'j'),
475        0o37 => by_case('P', 'k'),
476        0o40 => by_case('Q', 'α'), // Greek small letter alpha, U+03B1
477        0o41 => by_case('R', 'Δ'), // Greek capital delta, U+0394
478        0o42 => by_case('S', 'p'),
479        // Previously we thought that the right-hand character was ∈
480        // (Element of, U+2208), but seeing the greek letters grouped
481        // in section 6-2.3 ("RULES FOR SYMEX FORMATION") shows that
482        // this is a greek letter, epsilon.
483        0o43 => by_case('T', 'ε'), // Epsilon
484        0o44 => by_case('U', 'h'),
485        0o45 => by_case('V', '⊃'), // Superset of, U+2283
486        0o46 => by_case('W', 'β'), // Greek beta symbol, U+03B2
487        0o47 => by_case('X', '∧'), // Logical And U+2227
488        0o50 => by_case('Y', 'λ'), // Greek small letter lambda, U+03BB
489        0o51 => by_case('Z', '~'),
490        0o52 => by_case('(', '{'),
491        0o53 => by_case(')', '}'),
492        0o54 => by_case('+', '≡'), // Identical to, U+2261
493        0o55 => by_case('-', '='),
494        0o56 => by_case(',', '\u{0027}'), // Single apostrophe, U+0027
495        0o57 => by_case('.', '*'),
496        0o60 => {
497            // Despite the state change, on input only the 060 is
498            // emitted by the Lincoln Writer.  Carriage Return also
499            // advances the paper (i.e. performs a line feed).
500            Some('\r') // state change was already done.
501        }
502        0o61 => Some('\t'),
503        0o62 => Some('\u{0008}'), // backspace, U+0008
504        0o63 => None,             // COLOR BLACK; state change already done
505        0o64 => None,             // SUPER; state change already done
506        0o65 => None,             // NORMAL; state change already done
507        0o66 => None,             // SUB; state change already done
508        0o67 => None,             // COLOR RED; state change already done
509        0o70 => Some(' '),        // space
510        0o71 => return Some(unprintable(lin_ch, *state)), // WORD EXAM
511        0o72 => Some('\n'),       // LINE FEED UP
512        0o73 => Some('\u{008D}'), // LINE FEED DOWN
513        0o74 => None,             // LOWER CASE; state change already done
514        0o75 => None,             // UPPER CASE; state change already done
515        0o76 => return Some(unprintable(lin_ch, *state)), // STOP
516        0o77 => {
517            // Supposedly NULLIFY.  It's used on paper tape as a way
518            // to delete a character. Punching out all the bit holes
519            // changes the code to 0o77 and applications supposedly
520            // ignore these characters on the basis that the user has
521            // deleted them.
522            //
523            // For example suppose the user presses 'Q' followed by
524            // 'DELETE'.
525            //
526            // In off-line mode, where the LW is being used only to
527            // prepare a paper tape the TX-2 doesn't directly see the
528            // codes.  The tape will be punched with code 0o40
529            // (representing 'Q') and then the same location will be
530            // re-punched with 0o77 (effectively deleting the 'Q').
531            // Later when the paper tape is read, the only code the
532            // machine will see is the 0o77 (assuming that there was
533            // no previous upper/lower case change code).
534            //
535            // In on-line mode the TX-2 will see two codes, 0o40
536            // followed by 0o77; the Lincoln Writer cannot "un-send"
537            // the 0o40. This is the same behaviour as modern
538            // computers have for DELETE.  Therefore we map this code
539            // to ASCII DEL.
540            Some('\u{007F}')
541        }
542        _ => unreachable!("All Unsigned6Bit values should have been handled"),
543    };
544
545    if let Some(base) = base_char {
546        let display = match state.script {
547            Script::Normal => Some(base),
548            Script::Sub => subscript_char(base).ok(),
549            Script::Super => superscript_char(base).ok(),
550        };
551        // Non-carriage-advancing characters don't strictly match the
552        // key label, because we represent them as combining
553        // characters and so there's a space in the key label too.
554        let label_matches_unicode = if !advance {
555            false
556        } else {
557            #[allow(clippy::match_same_arms)]
558            match display {
559                None => false,
560                Some(' ') => {
561                    // Here the mapping is to ' ' but in the keyboard
562                    // implementation, the space bar's label is the
563                    // zero-length string.
564                    false
565                }
566                Some('\n' | '\r' | '\t' | '\u{0008}' | '\u{008D}' | '\u{007F}') => false,
567                Some('☛') => {
568                    // On the keyboard we label this with '☞' (Unicode
569                    // U+261E) instead of '☛'(U+261B) because the
570                    // outline looks more readable on the drawn
571                    // keyboard.  So these don't match.
572                    false
573                }
574                Some(_) => true,
575            }
576        };
577        Some(DescribedChar {
578            base_char: LincolnChar::UnicodeBaseChar(base),
579            unicode_representation: display,
580            attributes: *state,
581            advance,
582            label_matches_unicode,
583        })
584    } else {
585        None
586    }
587}
588
589#[derive(Debug, Clone, Copy, Eq, PartialEq)]
590pub enum LincolnToUnicodeStrictConversionFailure {
591    CannotSubscript(Unsigned6Bit, LincolnChar),
592    CannotSuperscript(Unsigned6Bit, LincolnChar),
593}
594
595/// Convert a stream of Lincoln Writer codes to a Unicode string.
596/// Lincoln Writer codes are 6 bits, and these are assumed to be in
597/// the lower 6 bits of the input values.
598///
599/// # Errors
600///
601/// If an input character is printable on the Lincoln Writer
602/// (i.e. would make a mark on the paper) but has no Unicode
603/// representation (e.g. because the LW is in superscript mode and
604/// there is no Unicode superscript character to represent the
605/// incoming LW character) then
606/// `Err(LincolnToUnicodeStrictConversionFailure)` is returned.
607pub fn lincoln_to_unicode_strict(
608    input: &[Unsigned6Bit],
609) -> Result<String, LincolnToUnicodeStrictConversionFailure> {
610    let mut result = String::with_capacity(input.len());
611    let mut state: LincolnState = LincolnState::default();
612    for byte in input {
613        match lincoln_char_to_described_char(*byte, &mut state) {
614            Some(DescribedChar {
615                base_char: LincolnChar::Unprintable(_),
616                ..
617            }) => {
618                // Codes like "YES" are handled here.  When printed on
619                // the Lincoln Writer, no character is printed (though
620                // some time is taken to not print it).
621                //
622                // We do nothing (i.e. generate no error and no output
623                // character).
624            }
625            Some(DescribedChar {
626                base_char: _,
627                unicode_representation: Some(display),
628                attributes: _,
629                advance: _,
630                label_matches_unicode: _,
631            }) => {
632                result.push(display);
633            }
634            Some(DescribedChar {
635                base_char,
636                unicode_representation: None,
637                attributes,
638                advance: _,
639                label_matches_unicode: _,
640            }) => match attributes.script {
641                Script::Normal => unreachable!(),
642                Script::Sub => {
643                    return Err(LincolnToUnicodeStrictConversionFailure::CannotSubscript(
644                        *byte, base_char,
645                    ));
646                }
647                Script::Super => {
648                    return Err(LincolnToUnicodeStrictConversionFailure::CannotSuperscript(
649                        *byte, base_char,
650                    ));
651                }
652            },
653            None => (),
654        }
655    }
656    Ok(result)
657}
658
659#[derive(Debug, Clone, Copy, Eq, PartialEq)]
660struct LincChar {
661    state: LincolnState,
662    value: Unsigned6Bit,
663}
664
665pub struct UnicodeToLincolnMapping {
666    m: HashMap<char, LincChar>,
667}
668
669#[derive(Debug, Clone, PartialEq, Eq)]
670pub enum UnicodeToLincolnConversionFailure {
671    NoMapping(char),
672}
673
674impl Display for UnicodeToLincolnConversionFailure {
675    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
676        match self {
677            UnicodeToLincolnConversionFailure::NoMapping(ch) => {
678                write!(
679                    f,
680                    "there is no mapping for '{ch}' from Unicode to Lincoln Writer character set",
681                )
682            }
683        }
684    }
685}
686
687impl UnicodeToLincolnMapping {
688    #[must_use]
689    pub fn new() -> UnicodeToLincolnMapping {
690        let mut m: HashMap<char, LincChar> = HashMap::new();
691        for script in [Script::Normal, Script::Super, Script::Sub] {
692            for case in [LwKeyboardCase::Lower, LwKeyboardCase::Upper] {
693                for value in 0..=0o77 {
694                    if let Ok(ch) = Unsigned6Bit::try_from(value) {
695                        let mut state = LincolnState {
696                            script,
697                            case,
698                            colour: Colour::Black,
699                        };
700                        if let Some(DescribedChar {
701                            base_char: _,
702                            unicode_representation: Some(display),
703                            attributes: _,
704                            advance: _,
705                            label_matches_unicode: _,
706                        }) = lincoln_char_to_described_char(ch, &mut state)
707                        {
708                            m.insert(display, LincChar { state, value: ch });
709                        }
710                    } else {
711                        continue;
712                    }
713                }
714            }
715        }
716        UnicodeToLincolnMapping { m }
717    }
718
719    /// Convert a Unicode string to a sequence of Lincoln Writer codes.
720    ///
721    /// # Errors
722    ///
723    /// `Err(UnicodeToLincolnconversionfailure)` is returned when one
724    /// of the Unicode characters in the input cannot be converted to
725    /// a Lincoln Writer code.
726    pub fn to_lincoln(
727        &self,
728        s: &str,
729    ) -> Result<Vec<Unsigned6Bit>, UnicodeToLincolnConversionFailure> {
730        let mut result: Vec<Unsigned6Bit> = Vec::with_capacity(s.len());
731        let mut current_case: Option<LwKeyboardCase> = None;
732        let mut current_script: Option<Script> = None;
733
734        for ch in s.chars() {
735            match self.m.get(&ch) {
736                None => {
737                    return Err(UnicodeToLincolnConversionFailure::NoMapping(ch));
738                }
739                Some(lch) => {
740                    if Some(lch.state.case) == current_case {
741                        // Nothing to do
742                    } else {
743                        result.push(match lch.state.case {
744                            LwKeyboardCase::Upper => u6!(0o75),
745                            LwKeyboardCase::Lower => u6!(0o74),
746                        });
747                        current_case = Some(lch.state.case);
748                    }
749
750                    if Some(lch.state.script) == current_script {
751                        // Nothing to do
752                    } else {
753                        result.push(match lch.state.script {
754                            Script::Super => u6!(0o64),
755                            Script::Normal => u6!(0o65),
756                            Script::Sub => u6!(0o66),
757                        });
758                        current_script = Some(lch.state.script);
759                    }
760
761                    result.push(lch.value);
762                }
763            }
764        }
765        Ok(result)
766    }
767}
768
769impl Default for UnicodeToLincolnMapping {
770    fn default() -> UnicodeToLincolnMapping {
771        UnicodeToLincolnMapping::new()
772    }
773}