assembler/
parser.rs

1//! Turn a sequence of input tokens into a data structure representing
2//! it.
3//!
4//! This data structure isn't a tree, because the logical
5//! structure of a TX-2 assembly-language program isn't tree-like.
6use std::{
7    collections::BTreeMap,
8    collections::BTreeSet,
9    fmt::Debug,
10    ops::{BitOr, Range, Shl},
11};
12
13pub(crate) mod helpers;
14mod symex;
15#[cfg(test)]
16mod tests;
17
18use chumsky::error::Rich;
19use chumsky::extension::v1::Ext;
20use chumsky::extension::v1::ExtParser;
21use chumsky::extra::{Full, ParserExtra};
22use chumsky::input::InputRef;
23use chumsky::input::{Emitter, MapExtra, Stream, ValueInput};
24
25use chumsky::Boxed;
26use chumsky::Parser;
27use chumsky::prelude::{Input, IterParser, Recursive, SimpleSpan, choice, just, one_of, recursive};
28use chumsky::select;
29
30use crate::collections::OneOrMore;
31
32use super::ast::{
33    ArithmeticExpression, Atom, CommaDelimitedFragment, Commas, CommasOrInstruction, ConfigValue,
34    Equality, EqualityValue, FragmentWithHold, HoldBit, InstructionFragment, LiteralValue,
35    Operator, Origin, RegisterContaining, RegistersContaining, SignedAtom, SpannedSymbolOrLiteral,
36    SymbolOrLiteral, Tag, TaggedProgramInstruction, UntaggedProgramInstruction,
37};
38use super::lexer::{self};
39use super::manuscript::{
40    MacroBodyLine, MacroDefinition, MacroDummyParameters, MacroInvocation, MacroParameter,
41    MacroParameterBindings, MacroParameterValue, ManuscriptLine, ManuscriptMetaCommand, SourceFile,
42    manuscript_lines_to_source_file,
43};
44use super::span::{Span, Spanned, span};
45use super::state::{NumeralMode, State};
46use super::symbol::SymbolName;
47use base::charset::Script;
48use base::prelude::*;
49use helpers::Sign;
50use symex::SymexSyllableRule;
51
52pub(crate) type ExtraWithoutContext<'a> = Full<Rich<'a, lexer::Token>, State<'a>, ()>;
53
54use lexer::Token as Tok;
55
56fn maybe_sign<'a, I>(
57    script_required: Script,
58) -> impl Parser<'a, I, Option<(Sign, Span)>, ExtraWithoutContext<'a>> + Clone
59where
60    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
61{
62    choice((
63        just(Tok::Plus(script_required)).to(Sign::Plus),
64        just(Tok::Minus(script_required)).to(Sign::Minus),
65    ))
66    .map_with(|maybe_sign, extra| (maybe_sign, extra.span()))
67    .or_not()
68}
69
70#[derive(Debug, PartialEq, Eq, Clone)]
71enum BitDesignatorValidation {
72    Good(LiteralValue),
73    Suspect(u8, u8, LiteralValue),
74}
75
76fn make_bit_designator_literal(
77    script: Script,
78    quarter: u8,
79    bitnum: u8,
80    span: Span,
81) -> BitDesignatorValidation {
82    fn build(q: u64, b: u64) -> Unsigned36Bit {
83        // When used as a subscript, the quarter number goes into bits
84        // 3.6-3.5 (bits 23,22).  The bit number goes into bits
85        // 3.4-3.1 (bits 21-18).  However, subscript values are
86        // shifted left by 18 bits.  Meaning, if this is used as a
87        // normal-script value, it should not be shifted.
88        let qmod4: u64 = q % 4_u64;
89        Unsigned36Bit::ZERO.bitor(qmod4.shl(4_u32).bitor(b))
90    }
91    // Apparently-invalid bit designators should still be accepted.
92    // See for example the description of the SKM instruction (in
93    // chapter 3 of the Users Handbook which explains what the machine
94    // does with invalid bit designators.  See also the example in the
95    // table in section 6-2.4 of the Users Handbook.
96    //
97    // So we arrange to issue a warning message for this case.
98    let value = LiteralValue::from((span, script, build(quarter.into(), bitnum.into())));
99    match (quarter, bitnum) {
100        (1..=4, 1..=9) | (4, 10) => BitDesignatorValidation::Good(value),
101        _ => BitDesignatorValidation::Suspect(quarter, bitnum, value),
102    }
103}
104
105// I'm not really defining my own type here, this is just for
106// abbreviation purposes.
107type MyEmitter<'a, I> = Emitter<
108    <chumsky::extra::Full<chumsky::error::Rich<'a, lexer::Token>, State<'a>, ()> as ParserExtra<
109        'a,
110        I,
111    >>::Error,
112>;
113
114fn warn_bad_bitpos<'src, I>(
115    validated: BitDesignatorValidation,
116    extra: &mut MapExtra<'src, '_, I, ExtraWithoutContext<'src>>,
117    emitter: &mut MyEmitter<'src, I>,
118) -> LiteralValue
119where
120    I: Input<'src, Token = Tok, Span = Span> + ValueInput<'src>,
121{
122    match validated {
123        // This is a warning message only, because it's
124        // allowed to specify a nonexistent bit position (see
125        // description of SKM instruction).
126        BitDesignatorValidation::Suspect(q, b, literal) => {
127            emitter.emit(Rich::custom(
128                extra.span(),
129                format!("bit position {q}\u{00B7}{b} does not exist"),
130            ));
131            literal
132        }
133        BitDesignatorValidation::Good(literal) => literal,
134    }
135}
136
137fn bit_selector<'a, I>(
138    script_required: Script,
139) -> impl Parser<'a, I, LiteralValue, ExtraWithoutContext<'a>> + Clone
140where
141    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
142{
143    select! {
144        Tok::BitPosition(script, quarter, bits) if script == script_required => (quarter, bits)
145    }
146    .try_map_with(move |(quarter, bit), extra| {
147        // Bit designators are always in decimal.  They end up in the "j bits" of the instruction word (bits 3.6 to 3.1).
148        // This is described in the Users Handbook on page 3-34 (in the description of the SKM instruction)
149        match quarter.as_str().parse::<u8>() {
150            Err(_) => Err(Rich::custom(
151                extra.span(),
152                format!("quarter {quarter} is not a valid decimal number"),
153            )),
154            Ok(q) => match bit.as_str().parse::<u8>() {
155                Ok(bit) => Ok(make_bit_designator_literal(
156                    script_required,
157                    q,
158                    bit,
159                    extra.span(),
160                )),
161                Err(_) => Err(Rich::custom(
162                    extra.span(),
163                    format!("bit position {bit} is not a valid decimal number"),
164                )),
165            },
166        }
167    })
168    .validate(warn_bad_bitpos)
169}
170
171fn literal<'a, I>(
172    script_required: Script,
173) -> impl Parser<'a, I, LiteralValue, ExtraWithoutContext<'a>> + Clone
174where
175    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
176{
177    let plain_literal = {
178        let digits = select! {
179            Tok::Digits(script, n) if script == script_required => n,
180        };
181
182        digits.try_map_with(move |digits_token_payload, extra| {
183            let state: &State = extra.state();
184            let mode: NumeralMode = state.numeral_mode;
185            match digits_token_payload.make_num(mode) {
186                Ok(value) => Ok(LiteralValue::from((extra.span(), script_required, value))),
187                Err(e) => Err(Rich::custom(extra.span(), e.to_string())),
188            }
189        })
190    };
191    choice((bit_selector(script_required), plain_literal)).labelled("numeric literal")
192}
193
194fn here<'a, I>(
195    script_required: Script,
196) -> impl Parser<'a, I, SymbolOrLiteral, ExtraWithoutContext<'a>> + Clone
197where
198    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
199{
200    select! {
201        Tok::Hash(script) if script == script_required => (),
202    }
203    .map_with(move |(), extra| SymbolOrLiteral::Here(script_required, extra.span()))
204}
205
206fn opcode_code(s: &str) -> Option<(Unsigned5Bit, Unsigned6Bit)> {
207    match s {
208        "IOS" => Some((u5!(0o00), u6!(0o04))),
209        "JMP" => Some((u5!(0o00), u6!(0o05))),
210        "BRC" => Some((u5!(0o01), u6!(0o05))),
211        "JPS" => Some((u5!(0o02), u6!(0o05))),
212        "BRS" => Some((u5!(0o03), u6!(0o05))),
213        "JPQ" => Some((u5!(0o14), u6!(0o05))),
214        "BPQ" => Some((u5!(0o15), u6!(0o05))),
215        "JES" => Some((u5!(0o16), u6!(0o05))),
216        "JPD" => Some((u5!(0o20), u6!(0o05))),
217        "BRD" => Some((u5!(0o21), u6!(0o05))),
218        "JDS" => Some((u5!(0o22), u6!(0o05))),
219        "BDS" => Some((u5!(0o23), u6!(0o05))),
220        "JPX" => Some((u5!(0o00), u6!(0o06))),
221        "JNX" => Some((u5!(0o00), u6!(0o07))),
222        "AUX" => Some((u5!(0o00), u6!(0o10))),
223        "RSX" => Some((u5!(0o00), u6!(0o11))),
224        "SKX" | "REX" | "SEX" => Some((u5!(0o00), u6!(0o12))),
225        "INX" => Some((u5!(0o02), u6!(0o12))),
226        "DEX" => Some((u5!(0o03), u6!(0o12))),
227        "SXD" => Some((u5!(0o04), u6!(0o12))),
228        "SXL" => Some((u5!(0o06), u6!(0o12))),
229        "SXG" => Some((u5!(0o07), u6!(0o12))),
230        "RXF" => Some((u5!(0o10), u6!(0o12))),
231        "RXD" => Some((u5!(0o20), u6!(0o12))),
232        "RFD" => Some((u5!(0o30), u6!(0o12))),
233        "EXX" => Some((u5!(0o00), u6!(0o14))),
234        "ADX" => Some((u5!(0o00), u6!(0o15))),
235        "DPX" => Some((u5!(0o00), u6!(0o16))),
236        "SKM" => Some((u5!(0o00), u6!(0o17))),
237        "MKC" => Some((u5!(0o01), u6!(0o17))),
238        "MKZ" => Some((u5!(0o02), u6!(0o17))),
239        "MKN" => Some((u5!(0o03), u6!(0o17))),
240        "SKU" => Some((u5!(0o10), u6!(0o17))),
241        "SUC" => Some((u5!(0o11), u6!(0o17))),
242        "SUZ" => Some((u5!(0o12), u6!(0o17))),
243        "SUN" => Some((u5!(0o13), u6!(0o17))),
244        "SKZ" => Some((u5!(0o20), u6!(0o17))),
245        "SZC" => Some((u5!(0o21), u6!(0o17))),
246        "SZZ" => Some((u5!(0o22), u6!(0o17))),
247        "SZN" => Some((u5!(0o23), u6!(0o17))),
248        "SKN" => Some((u5!(0o30), u6!(0o17))),
249        "SNC" => Some((u5!(0o31), u6!(0o17))),
250        "SNZ" => Some((u5!(0o32), u6!(0o17))),
251        "SNN" => Some((u5!(0o33), u6!(0o17))),
252        "CYR" => Some((u5!(0o04), u6!(0o17))),
253        "MCR" => Some((u5!(0o05), u6!(0o17))),
254        "MZR" => Some((u5!(0o06), u6!(0o17))),
255        "MNR" => Some((u5!(0o07), u6!(0o17))),
256        "SNR" => Some((u5!(0o34), u6!(0o17))),
257        "SZR" => Some((u5!(0o24), u6!(0o17))),
258        "SUR" => Some((u5!(0o14), u6!(0o17))),
259        "LDE" => Some((u5!(0o00), u6!(0o20))),
260        "SPF" => Some((u5!(0o00), u6!(0o21))),
261        "SPG" => Some((u5!(0o00), u6!(0o22))),
262        "LDA" => Some((u5!(0o00), u6!(0o24))),
263        "LDB" => Some((u5!(0o00), u6!(0o25))),
264        "LDC" => Some((u5!(0o00), u6!(0o26))),
265        "LDD" => Some((u5!(0o00), u6!(0o27))),
266        "STE" => Some((u5!(0o00), u6!(0o30))),
267        "FLF" => Some((u5!(0o00), u6!(0o31))),
268        "FLG" => Some((u5!(0o00), u6!(0o32))),
269        "STA" => Some((u5!(0o00), u6!(0o34))),
270        "STB" => Some((u5!(0o00), u6!(0o35))),
271        "STC" => Some((u5!(0o00), u6!(0o36))),
272        "STD" => Some((u5!(0o00), u6!(0o37))),
273        "ITE" => Some((u5!(0o00), u6!(0o40))),
274        "ITA" => Some((u5!(0o00), u6!(0o41))),
275        "UNA" => Some((u5!(0o00), u6!(0o42))),
276        "SED" => Some((u5!(0o00), u6!(0o43))),
277        "JOV" => Some((u5!(0o00), u6!(0o45))),
278        "JPA" => Some((u5!(0o00), u6!(0o46))),
279        "JNA" => Some((u5!(0o00), u6!(0o47))),
280        "EXA" => Some((u5!(0o00), u6!(0o54))),
281        "INS" => Some((u5!(0o00), u6!(0o55))),
282        "COM" => Some((u5!(0o00), u6!(0o56))),
283        "TSD" => Some((u5!(0o00), u6!(0o57))),
284        "CYA" => Some((u5!(0o00), u6!(0o60))),
285        "CYB" => Some((u5!(0o00), u6!(0o61))),
286        "CAB" => Some((u5!(0o00), u6!(0o62))),
287        "NOA" => Some((u5!(0o00), u6!(0o64))),
288        "DSA" => Some((u5!(0o00), u6!(0o65))),
289        "NAB" => Some((u5!(0o00), u6!(0o66))),
290        "ADD" => Some((u5!(0o00), u6!(0o67))),
291        "SCA" => Some((u5!(0o00), u6!(0o70))),
292        "SCB" => Some((u5!(0o00), u6!(0o71))),
293        "SAB" => Some((u5!(0o00), u6!(0o72))),
294        "TLY" => Some((u5!(0o00), u6!(0o74))),
295        "DIV" => Some((u5!(0o00), u6!(0o75))),
296        "MUL" => Some((u5!(0o00), u6!(0o76))),
297        "SUB" => Some((u5!(0o00), u6!(0o77))),
298        _ => None,
299    }
300}
301
302pub(super) fn symbol_or_literal<'a, I>(
303    script_required: Script,
304) -> impl Parser<'a, I, SymbolOrLiteral, ExtraWithoutContext<'a>> + Clone
305where
306    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
307{
308    choice((
309        literal(script_required).map(SymbolOrLiteral::Literal),
310        symex::symex_syllable(script_required).map_with(move |name, extra| {
311            SymbolOrLiteral::Symbol(script_required, SymbolName::from(name), extra.span())
312        }),
313    ))
314    .labelled(match script_required {
315        Script::Super => "superscript single-syllable symbol or literal",
316        Script::Normal => "single-syllable symbol or literal",
317        Script::Sub => "subscript single-syllable symbol or literal",
318    })
319}
320
321fn opcode_to_literal(code: Unsigned6Bit, cfgbits: Unsigned5Bit, span: Span) -> LiteralValue {
322    let bits = Unsigned36Bit::ZERO
323        .bitor(u64::from(code).shl(24))
324        .bitor(u64::from(cfgbits).shl(30))
325        .bitor(helpers::opcode_auto_hold_bit(code));
326    LiteralValue::from((span, Script::Normal, bits))
327}
328
329pub(super) fn opcode<'a, I>() -> impl Parser<'a, I, LiteralValue, ExtraWithoutContext<'a>> + Clone
330where
331    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
332{
333    symex::symex_syllable(Script::Normal)
334        .filter(|mnemonic| opcode_code(mnemonic).is_some())
335        .try_map_with(|mnemonic, extra| match opcode_code(mnemonic.as_str()) {
336            Some((cfgbits, code)) => Ok(opcode_to_literal(code, cfgbits, extra.span())),
337            None => Err(Rich::custom(
338                extra.span(),
339                format!("'{mnemonic}' is not an opcode mnemonic"),
340            )),
341        })
342        .labelled("opcode")
343}
344
345fn named_symbol<'a, I>(
346    rule: SymexSyllableRule,
347    script_required: Script,
348) -> impl Parser<'a, I, SymbolName, ExtraWithoutContext<'a>> + Clone
349where
350    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
351{
352    symex::parse_symex(rule, script_required)
353}
354
355pub(super) fn operator<'a, I>(
356    script_required: Script,
357) -> impl Parser<'a, I, Operator, ExtraWithoutContext<'a>> + Clone
358where
359    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
360{
361    select! {
362        // Solidus ("/") is used for divide.  See section 6-2.7
363        // "Word Assembly" for details.
364        Tok::Solidus(script) if script_required == script => Operator::Divide,
365        Tok::Plus(Script::Normal) => Operator::Add,
366        Tok::Times(got) if got == script_required => Operator::Multiply,
367        Tok::LogicalOr(got) if got == script_required => Operator::LogicalOr,
368        Tok::LogicalAnd(got) if got == script_required => Operator::LogicalAnd,
369        Tok::Minus(got) if script_required == got => Operator::Subtract,
370        Tok::Plus(got) if script_required == got => Operator::Add,
371    }
372    .labelled("arithmetic operator")
373}
374
375fn asterisk_indirection_fragment<'srcbody, I>()
376-> impl Parser<'srcbody, I, InstructionFragment, ExtraWithoutContext<'srcbody>> + Clone
377where
378    I: Input<'srcbody, Token = Tok, Span = Span> + ValueInput<'srcbody>,
379{
380    just(Tok::Asterisk(Script::Normal))
381        .map_with(|_, extra| InstructionFragment::DeferredAddressing(extra.span()))
382}
383
384/// The pipe construct is described in section 6-2.8 "SPECIAL SYMBOLS"
385/// of the Users Handbook.
386///
387/// "ADXₚ|ₜQ" should be equivalent to "ADXₚ{Qₜ}*".  So during
388/// evaluation we will need to generate an RC-word containing Qₜ.
389fn make_pipe_construct(
390    (p, (t, (q, q_span))): (
391        SpannedSymbolOrLiteral,
392        (SpannedSymbolOrLiteral, (InstructionFragment, Span)),
393    ),
394) -> InstructionFragment {
395    // The variable names here are taken from the example in the
396    // documentation comment.
397    let tqspan = span(t.span.start..q_span.end);
398
399    let rc_word_value: RegisterContaining = RegisterContaining::from(TaggedProgramInstruction {
400        span: tqspan,
401        tags: Vec::new(),
402        instruction: UntaggedProgramInstruction::from(OneOrMore::with_tail(
403            CommaDelimitedFragment {
404                span: q_span,
405                holdbit: HoldBit::Unspecified,
406                leading_commas: None,
407                fragment: q,
408                trailing_commas: None,
409            },
410            vec![CommaDelimitedFragment {
411                span: t.span,
412                leading_commas: None,
413                holdbit: HoldBit::Unspecified,
414                fragment: InstructionFragment::Arithmetic(ArithmeticExpression::from(Atom::from(
415                    t.item,
416                ))),
417                trailing_commas: None,
418            }],
419        )),
420    });
421    InstructionFragment::PipeConstruct {
422        index: p,
423        rc_word_span: tqspan,
424        rc_word_value,
425    }
426}
427
428/// Macro terminators are described in section 6-4.5 of the TX-2 User Handbook.
429fn macro_terminator<'a, I>() -> impl Parser<'a, I, Tok, ExtraWithoutContext<'a>>
430where
431    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
432{
433    // This list of 16 allowed terminators is exhaustive, see section
434    // 6-4.5 of the TX-2 User Handbook.
435    //
436    // ☛ , = →  | ⊃ ≡ ~ < > ∩ ∪ / × ∨ ∧
437    //
438    // The second symbol, in my scanned copy of the Users Handbook
439    // (page 6-31 of the Nov 1963 Users Handbook), looks like either a
440    // comma or a dot/full-stop/period.  Since a dot is valid in a
441    // symex name, and because the symbol seems to be taller than it
442    // is wide, I'm going to assume it is a comma.
443    //
444    // They are actually hard to distinguish in the copy of the Users
445    // Handbook I have.  But, looking at page 011 of Leonard
446    // Kleinrock's listing for his network simulator, the `HP OS`
447    // macro is definitely using as a separator a symbol that lives on
448    // the line.  If you look a little further below on the same
449    // page, the third instruction in the body of the `MV MX` macro
450    // body contains both a dot and a comma.  The dot is definitely
451    // above the line and looks rounder.  So I conclude that the
452    // separator character is a comma.
453    choice((
454        just(Tok::Hand(Script::Normal)),
455        just(Tok::Comma(Script::Normal)),
456        just(Tok::Equals(Script::Normal)),
457        just(Tok::Arrow(Script::Normal)),
458        just(Tok::Pipe(Script::Normal)),
459        just(Tok::ProperSuperset(Script::Normal)),
460        just(Tok::IdenticalTo(Script::Normal)),
461        just(Tok::Tilde(Script::Normal)),
462        just(Tok::LessThan(Script::Normal)),
463        just(Tok::GreaterThan(Script::Normal)),
464        just(Tok::Intersection(Script::Normal)),
465        just(Tok::Union(Script::Normal)),
466        just(Tok::Solidus(Script::Normal)),
467        just(Tok::Times(Script::Normal)),
468        just(Tok::LogicalOr(Script::Normal)),
469        just(Tok::LogicalAnd(Script::Normal)),
470    ))
471    .labelled("macro terminator")
472}
473
474// Exposed for testing.
475fn macro_definition_dummy_parameter<'a, I>()
476-> impl Parser<'a, I, MacroParameter, ExtraWithoutContext<'a>>
477where
478    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
479{
480    // The TX-2 Users Handbook section 6-4.4 ("DUMMY PARAMETERS")
481    // doesn't disallow spaces in macro argument names.
482    (macro_terminator().then(named_symbol(SymexSyllableRule::Multiple, Script::Normal))).map_with(
483        |(terminator, symbol), extra| MacroParameter {
484            name: symbol,
485            span: extra.span(),
486            preceding_terminator: terminator,
487        },
488    )
489}
490
491fn macro_definition_dummy_parameters<'a, I>()
492-> impl Parser<'a, I, MacroDummyParameters, ExtraWithoutContext<'a>>
493where
494    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
495{
496    choice((
497        macro_definition_dummy_parameter()
498            .repeated()
499            .at_least(1)
500            .collect::<Vec<_>>()
501            .map(MacroDummyParameters::OneOrMore),
502        macro_terminator().map(MacroDummyParameters::Zero),
503    ))
504}
505
506/// Macros are described in section 6-4 of the TX-2 User Handbook.
507fn macro_definition<'a, 'b, I>(
508    grammar: &Grammar<'a, 'b, I>,
509) -> impl Parser<'a, I, MacroDefinition, ExtraWithoutContext<'a>> + use<'a, 'b, I>
510where
511    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
512{
513    named_metacommand(Metacommand::DefineMacro)
514        .ignore_then(
515            named_symbol(SymexSyllableRule::Multiple, Script::Normal).labelled("macro name"), // the macro's name (# is not allowed)
516        )
517        .then(macro_definition_dummy_parameters())
518        .then_ignore(end_of_line())
519        .then(
520            (macro_body_line(grammar).then_ignore(end_of_line()))
521                .repeated()
522                .collect()
523                .labelled("macro body"),
524        )
525        .then_ignore(named_metacommand(Metacommand::EndMacroDefinition))
526        // We don't parse end-of-line here because all metacommands are supposed
527        // to be followed by end-of-line.
528        .map_with(|((name, args), body), extra| {
529            let definition = MacroDefinition {
530                name,
531                params: args,
532                body,
533                span: extra.span(),
534            };
535            extra.state().define_macro(definition.clone());
536            definition
537        })
538}
539
540fn macro_body_line<'a, 'b, I>(
541    grammar: &Grammar<'a, 'b, I>,
542) -> impl Parser<'a, I, MacroBodyLine, ExtraWithoutContext<'a>> + use<'a, 'b, I>
543where
544    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
545{
546    choice((
547        macro_invocation().map(MacroBodyLine::Expansion),
548        grammar.assignment.clone().map(MacroBodyLine::Equality),
549        grammar
550            .tagged_program_instruction
551            .clone()
552            .map(MacroBodyLine::Instruction),
553    ))
554}
555
556fn arithmetic_expression_in_any_script_allowing_spaces<'a, I>()
557-> impl Parser<'a, I, (Span, Script, ArithmeticExpression), ExtraWithoutContext<'a>>
558where
559    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
560{
561    let g = grammar();
562    choice((
563        g.normal_arithmetic_expression_allowing_spaces
564            .map_with(|expr, extra| (extra.span(), Script::Normal, expr)),
565        g.subscript_arithmetic_expression_allowing_spaces
566            .map_with(|expr, extra| (extra.span(), Script::Sub, expr)),
567        g.superscript_arithmetic_expression_allowing_spaces
568            .map_with(|expr, extra| (extra.span(), Script::Super, expr)),
569    ))
570}
571
572fn defined_macro_name<'src, I>() -> impl Parser<'src, I, MacroDefinition, ExtraWithoutContext<'src>>
573where
574    I: Input<'src, Token = Tok, Span = Span> + ValueInput<'src>,
575{
576    fn mapping<'a>(
577        name: &SymbolName,
578        state: &State,
579        span: Span,
580    ) -> Result<MacroDefinition, chumsky::error::Rich<'a, lexer::Token>> {
581        match state.get_macro_definition(name) {
582            None => Err(Rich::custom(span, format!("unknown macro {name}"))),
583            Some(macro_def) => Ok(macro_def.clone()),
584        }
585    }
586
587    symex::parse_symex(SymexSyllableRule::OneOnly, Script::Normal).try_map_with(|name, extra| {
588        let span: Span = extra.span();
589        let state: &State = extra.state();
590        mapping(&name, state, span)
591    })
592}
593
594type ParsedMacroArg = Option<(Script, ArithmeticExpression)>;
595
596#[derive(Clone)]
597struct MacroInvocationParser<'src, I>
598where
599    I: Input<'src, Token = Tok, Span = Span> + ValueInput<'src>,
600{
601    expr_parser: Boxed<'src, 'src, I, (Span, ParsedMacroArg), ExtraWithoutContext<'src>>,
602    defined_macro_name_parser: Boxed<'src, 'src, I, MacroDefinition, ExtraWithoutContext<'src>>,
603}
604
605impl<'src, I> Default for MacroInvocationParser<'src, I>
606where
607    I: Input<'src, Token = Tok, Span = Span> + ValueInput<'src>,
608{
609    fn default() -> Self {
610        Self {
611            expr_parser: arithmetic_expression_in_any_script_allowing_spaces()
612                .or_not()
613                .map_with(|got, extra| match got {
614                    Some((span, script, expr)) => (span, Some((script, expr))),
615                    None => (extra.span(), None),
616                })
617                .boxed(),
618            defined_macro_name_parser: defined_macro_name().boxed(),
619        }
620    }
621}
622
623fn macro_invocation<'src, I>() -> impl Parser<'src, I, MacroInvocation, ExtraWithoutContext<'src>>
624where
625    I: Input<'src, Token = Tok, Span = Span> + ValueInput<'src>,
626{
627    Ext(MacroInvocationParser::default())
628}
629
630impl<'src, I> ExtParser<'src, I, MacroInvocation, ExtraWithoutContext<'src>>
631    for MacroInvocationParser<'src, I>
632where
633    I: Input<'src, Token = Tok, Span = Span> + ValueInput<'src>,
634{
635    fn parse(
636        &self,
637        inp: &mut InputRef<'src, '_, I, ExtraWithoutContext<'src>>,
638    ) -> Result<
639        MacroInvocation,
640        <Full<Rich<'src, Tok>, State<'src>, ()> as ParserExtra<'src, I>>::Error,
641    > {
642        let before = inp.cursor();
643        let macro_def: MacroDefinition = inp.parse(&self.defined_macro_name_parser)?;
644        let param_defs: Vec<MacroParameter> = match macro_def.params {
645            MacroDummyParameters::Zero(ref expected) => {
646                if let Some(got) = inp.next_maybe().as_deref() {
647                    if got == expected {
648                        Vec::new()
649                    } else {
650                        return Err(Rich::custom(
651                            inp.span_since(&before),
652                            format!(
653                                "expected macro name {} to be followed a terminator {} but got {}",
654                                &macro_def.name, expected, got
655                            ),
656                        ));
657                    }
658                } else {
659                    return Err(Rich::custom(
660                        inp.span_since(&before),
661                        format!(
662                            "expected macro name {} to be followed a terminator {}",
663                            &macro_def.name, expected
664                        ),
665                    ));
666                }
667            }
668            MacroDummyParameters::OneOrMore(ref params) => params.clone(),
669        };
670        let mut param_values: MacroParameterBindings = Default::default();
671        for param_def in param_defs {
672            let before = inp.cursor();
673            if let Some(got) = inp.next_maybe().as_deref() {
674                let span = inp.span_since(&before);
675                if got == &param_def.preceding_terminator {
676                    match inp.parse(&self.expr_parser)? {
677                        (span, Some((script, expr))) => {
678                            param_values.insert(
679                                param_def.name,
680                                span,
681                                Some(MacroParameterValue::Value(script, expr)),
682                            );
683                        }
684                        (span, None) => {
685                            // Record the fact that this parameter was missing.
686                            param_values.insert(param_def.name, span, None);
687                        }
688                    }
689                } else {
690                    return Err(Rich::custom(
691                        span,
692                        format!(
693                            "in invocation of macro {}, expected macro terminator {} before parameter {} but got {}",
694                            &macro_def.name, &param_def.preceding_terminator, &param_def.name, &got
695                        ),
696                    ));
697                }
698            } else {
699                let span = inp.span_since(&before);
700                return Err(Rich::custom(
701                    span,
702                    format!(
703                        "in invocation of macro {}, expected macro terminator {} before parameter {}",
704                        &macro_def.name, &param_def.preceding_terminator, &param_def.name
705                    ),
706                ));
707            }
708        }
709        Ok(MacroInvocation {
710            macro_def,
711            param_values,
712        })
713    }
714}
715
716#[derive(Debug, PartialEq, Eq, Clone, Copy)]
717enum Metacommand {
718    Decimal,
719    Octal,
720    /// A ☛☛PUNCH meta command.
721    Punch,
722    DefineMacro,
723    EndMacroDefinition,
724}
725
726fn named_metacommand<'a, I>(which: Metacommand) -> impl Parser<'a, I, (), ExtraWithoutContext<'a>>
727where
728    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
729{
730    let name_match = move |actual: &str| -> bool {
731        match which {
732            Metacommand::Decimal => {
733                matches!(actual, "DECIMAL" | "DECIMA" | "DECIM" | "DECI" | "DEC")
734            }
735            Metacommand::Octal => matches!(actual, "OCTAL" | "OCTA" | "OCT" | "OC"),
736            Metacommand::Punch => matches!(actual, "PUNCH" | "PUNC" | "PUN" | "PU"),
737            Metacommand::DefineMacro => actual == "DEF",
738            Metacommand::EndMacroDefinition => matches!(actual, "EMD" | "EM"),
739        }
740    };
741
742    let matching_metacommand_name = select! {
743        Tok::SymexSyllable(Script::Normal, name) if name_match(name.as_str()) => (),
744    };
745
746    just([Tok::Hand(Script::Normal), Tok::Hand(Script::Normal)])
747        .ignored()
748        .then_ignore(matching_metacommand_name)
749}
750
751fn metacommand<'a, 'b, I>(
752    grammar: &Grammar<'a, 'b, I>,
753) -> impl Parser<'a, I, ManuscriptMetaCommand, ExtraWithoutContext<'a>> + use<'a, 'b, I>
754where
755    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
756{
757    fn punch<'a, I>() -> impl Parser<'a, I, ManuscriptMetaCommand, ExtraWithoutContext<'a>>
758    where
759        I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
760    {
761        // We currently have a limitation in the interpretation of
762        // "AA" in the PUNCH metacommand.  The documentation clearly
763        // states that this should be an honest tag.  We currently
764        // accept only numeric literals.
765        named_metacommand(Metacommand::Punch)
766            .ignore_then(literal(Script::Normal).or_not())
767            .try_map(|aa, span| match helpers::punch_address(aa) {
768                Ok(punch) => Ok(ManuscriptMetaCommand::Punch(punch)),
769                Err(msg) => Err(Rich::custom(span, msg)),
770            })
771            .labelled("PUNCH command")
772    }
773
774    fn base_change<'a, I>() -> impl Parser<'a, I, ManuscriptMetaCommand, ExtraWithoutContext<'a>>
775    where
776        I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
777    {
778        choice((
779            named_metacommand(Metacommand::Decimal)
780                .to(ManuscriptMetaCommand::BaseChange(NumeralMode::Decimal)),
781            named_metacommand(Metacommand::Octal)
782                .to(ManuscriptMetaCommand::BaseChange(NumeralMode::Octal)),
783        ))
784        .labelled("base-change metacommand")
785    }
786
787    choice((
788        base_change(),
789        punch(),
790        macro_definition(grammar).map(ManuscriptMetaCommand::Macro),
791    ))
792    .labelled("metacommand")
793}
794
795pub(crate) fn instructions_with_comma_counts<I>(it: I) -> Vec<CommaDelimitedFragment>
796where
797    I: Iterator<Item = CommasOrInstruction>,
798{
799    /// Fold operation which estabishes an alternating pattern of
800    /// commas and instructions.
801    ///
802    /// Invariant: acc is non-empty, begins and ends with C(_)
803    /// and does not contain a consecutive pair of C(_) or a
804    /// consecutive pair of Inst(_).
805    fn fold_step(
806        mut acc: Vec<CommasOrInstruction>,
807        item: CommasOrInstruction,
808    ) -> Vec<CommasOrInstruction> {
809        fn null_instruction(span: Span) -> FragmentWithHold {
810            FragmentWithHold {
811                span,
812                holdbit: HoldBit::Unspecified,
813                fragment: InstructionFragment::Null(span),
814            }
815        }
816
817        match acc.last_mut() {
818            Some(CommasOrInstruction::C(tail_comma)) => match item {
819                CommasOrInstruction::C(maybe_commas) => {
820                    if tail_comma.is_none() {
821                        *tail_comma = maybe_commas;
822                    } else {
823                        let null_inst_span: Span = match (tail_comma, &maybe_commas) {
824                            (_, Some(ic)) => span(ic.span().start..ic.span().start),
825                            (Some(tc), _) => span(tc.span().start..tc.span().start),
826                            (None, None) => {
827                                unreachable!(
828                                    "should be no need to interpose a null instruction between two instances of zero commas"
829                                );
830                            }
831                        };
832                        acc.push(CommasOrInstruction::I(null_instruction(null_inst_span)));
833                        acc.push(CommasOrInstruction::C(maybe_commas));
834                    }
835                }
836                CommasOrInstruction::I(inst) => {
837                    acc.push(CommasOrInstruction::I(inst));
838                    acc.push(CommasOrInstruction::C(None));
839                }
840            },
841            Some(CommasOrInstruction::I(_)) => unreachable!("invariant was broken"),
842            None => unreachable!("invariant was not established"),
843        }
844        assert!(matches!(acc.first(), Some(CommasOrInstruction::C(_))));
845        assert!(matches!(acc.last(), Some(CommasOrInstruction::C(_))));
846        acc
847    }
848
849    let mut it = it.peekable();
850
851    let initial_accumulator: Vec<CommasOrInstruction> = vec![CommasOrInstruction::C({
852        match it.peek() {
853            None => {
854                return Vec::new();
855            }
856            Some(CommasOrInstruction::I(_)) => None,
857            Some(CommasOrInstruction::C(maybe_commas)) => {
858                let c = maybe_commas.clone();
859                it.next();
860                c
861            }
862        }
863    })];
864
865    let tmp = it.fold(initial_accumulator, fold_step);
866    let mut output: Vec<CommaDelimitedFragment> = Vec::with_capacity(tmp.len() / 2 + 1);
867    let mut it = tmp.into_iter().peekable();
868    loop {
869        let maybe_before_count = it.next();
870        let maybe_inst = it.next();
871        match (maybe_before_count, maybe_inst) {
872            (None, _) => {
873                break;
874            }
875            (Some(CommasOrInstruction::C(before_commas)), Some(CommasOrInstruction::I(inst))) => {
876                let after_commas: Option<Commas> = match it.peek() {
877                    Some(CommasOrInstruction::C(commas)) => commas.clone(),
878                    None => None,
879                    Some(CommasOrInstruction::I(_)) => {
880                        unreachable!("fold_step did not maintain its invariant")
881                    }
882                };
883                output.push(CommaDelimitedFragment::new(
884                    before_commas,
885                    inst,
886                    after_commas,
887                ));
888            }
889            (Some(CommasOrInstruction::C(_)), None) => {
890                // No instructions in the input.
891                break;
892            }
893            (Some(CommasOrInstruction::I(_)), _)
894            | (Some(CommasOrInstruction::C(_)), Some(CommasOrInstruction::C(_))) => {
895                unreachable!("fold_step did not maintain its invariant");
896            }
897        }
898    }
899    output
900}
901
902fn tag_definition<'a, I>() -> impl Parser<'a, I, Tag, ExtraWithoutContext<'a>> + Clone
903where
904    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
905{
906    named_symbol(SymexSyllableRule::Multiple, Script::Normal)
907        .map_with(|name, extra| Tag {
908            name,
909            span: extra.span(),
910        })
911        .then_ignore(just(Tok::Arrow(Script::Normal)))
912        .labelled("tag definition")
913}
914
915fn commas<'a, I>() -> impl Parser<'a, I, Commas, ExtraWithoutContext<'a>> + Clone
916where
917    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
918{
919    just(Tok::Comma(Script::Normal))
920        .repeated()
921        .at_least(1)
922        .at_most(3)
923        .count()
924        .map_with(|count, extra| {
925            let span = extra.span();
926            match count {
927                1 => Commas::One(span),
928                2 => Commas::Two(span),
929                3 => Commas::Three(span),
930                _ => unreachable!(),
931            }
932        })
933}
934
935fn maybe_hold<'a, I>() -> impl Parser<'a, I, Option<HoldBit>, ExtraWithoutContext<'a>> + Clone
936where
937    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
938{
939    choice((
940        one_of(Tok::Hold).to(HoldBit::Hold),
941        just(Tok::NotHold).to(HoldBit::NotHold),
942    ))
943    .or_not()
944    .labelled("instruction hold bit")
945}
946
947struct Grammar<'a, 'b, I>
948where
949    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
950{
951    assignment: Boxed<'a, 'b, I, Equality, ExtraWithoutContext<'a>>,
952    tagged_program_instruction: Boxed<'a, 'b, I, TaggedProgramInstruction, ExtraWithoutContext<'a>>,
953    normal_arithmetic_expression_allowing_spaces:
954        Boxed<'a, 'b, I, ArithmeticExpression, ExtraWithoutContext<'a>>,
955    subscript_arithmetic_expression_allowing_spaces:
956        Boxed<'a, 'b, I, ArithmeticExpression, ExtraWithoutContext<'a>>,
957    superscript_arithmetic_expression_allowing_spaces:
958        Boxed<'a, 'b, I, ArithmeticExpression, ExtraWithoutContext<'a>>,
959    #[cfg(test)]
960    instruction_fragment: Boxed<'a, 'b, I, InstructionFragment, ExtraWithoutContext<'a>>,
961}
962
963fn grammar<'a: 'b, 'b, I>() -> Grammar<'a, 'b, I>
964where
965    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
966{
967    const ALLOW_SPACES: bool = true;
968
969    let mut comma_delimited_instructions = Recursive::declare();
970    let tagged_program_instruction = (tag_definition()
971        .repeated()
972        .collect()
973        .then(comma_delimited_instructions.clone()))
974    .map_with(
975        |(tags, fragments): (Vec<Tag>, OneOrMore<CommaDelimitedFragment>), extra| {
976            let span: Span = extra.span();
977            if let Some(t) = tags.first() {
978                assert_eq!(t.span.start, span.start);
979            }
980            TaggedProgramInstruction {
981                span: extra.span(),
982                tags,
983                instruction: UntaggedProgramInstruction::from(fragments),
984            }
985        },
986    )
987    .labelled(
988        "optional tag definition followed by a (possibly comma-delimited) program instructions",
989    );
990
991    // Parse {E} where E is some expression.  Since tags are
992    // allowed inside RC-blocks, we should parse E as a
993    // TaggedProgramInstruction.  But if we try to do that without
994    // using recursive() we will blow the stack, unfortunately.
995    let register_containing = tagged_program_instruction
996        .clone()
997        .delimited_by(
998            just(Tok::LeftBrace(Script::Normal)),
999            just(Tok::RightBrace(Script::Normal)),
1000        )
1001        .map_with(|tagged_instruction, extra| {
1002            Atom::RcRef(
1003                extra.span(),
1004                RegistersContaining::from_words(OneOrMore::new(RegisterContaining::from(
1005                    tagged_instruction,
1006                ))),
1007            )
1008        })
1009        .labelled("RC-word");
1010
1011    let arith_expr = |allow_spaces: bool, script_required: Script| {
1012        {
1013            let symex_syllable_rule = if allow_spaces {
1014                SymexSyllableRule::Multiple
1015            } else {
1016                SymexSyllableRule::OneOnly
1017            };
1018            // We use recursive here to prevent the parser blowing the stack
1019            // when trying to parse inputs which have parentheses - that is,
1020            // inputs that require recursion.
1021            recursive(move |arithmetic_expr| {
1022                // Parse (E) where E is some expression.
1023                let parenthesised_arithmetic_expression = arithmetic_expr // this is the recursive call
1024                    .clone()
1025                    .delimited_by(
1026                        just(Tok::LeftParen(script_required)),
1027                        just(Tok::RightParen(script_required)),
1028                    )
1029                    .map_with(move |expr, extra| {
1030                        Atom::Parens(extra.span(), script_required, Box::new(expr))
1031                    })
1032                    .labelled("parenthesised arithmetic expression");
1033
1034                // Parse a literal, symbol, #, or (recursively) an expression in parentheses.
1035                let naked_atom = choice((
1036                    literal(script_required).map(Atom::from),
1037                    opcode().map(Atom::from),
1038                    here(script_required).map(Atom::SymbolOrLiteral),
1039                    named_symbol(symex_syllable_rule, script_required).map_with(
1040                        move |symbol_name, extra| {
1041                            Atom::SymbolOrLiteral(SymbolOrLiteral::Symbol(
1042                                script_required,
1043                                symbol_name,
1044                                extra.span(),
1045                            ))
1046                        },
1047                    ),
1048                    register_containing,
1049                    parenthesised_arithmetic_expression,
1050                ))
1051                .boxed();
1052
1053                let signed_atom = maybe_sign(script_required).then(naked_atom).map_with(
1054                    |(possible_sign, magnitude), extra| SignedAtom {
1055                        span: extra.span(),
1056                        negated: matches!(possible_sign, Some((Sign::Minus, _))),
1057                        magnitude,
1058                    },
1059                );
1060
1061                // Parse an arithmetic operator (e.g. plus, times) followed by an atom.
1062                let operator_with_signed_atom = operator(script_required).then(signed_atom.clone());
1063
1064                // An arithmetic expression is a signed atom followed by zero or
1065                // more pairs of (arithmetic operator, signed atom).
1066                signed_atom
1067                    .then(operator_with_signed_atom.repeated().collect())
1068                    .map(|(head, tail)| ArithmeticExpression::with_tail(head, tail))
1069            })
1070        }
1071        .labelled("arithmetic expression")
1072    };
1073
1074    // Parse a values (symbolic or literal) or arithmetic expression.
1075    //
1076    // BAT² is not an identifier but a sequence[1] whose value is
1077    // computed by OR-ing the value of the symex BAT with the value of
1078    // the literal "²" (which is 2<<30, or 0o20_000_000_000).  But BAT²
1079    // is itself not an arithmetic_expression (because there is a script
1080    // change).
1081    //
1082    // You could argue that (BAT²) should be parsed as an atom.  Right
1083    // now that doesn't work because all the elements of an expression
1084    // (i.e. everything within the parens) need to have the same script.
1085    let program_instruction_fragment = recursive(|program_instruction_fragment| {
1086        // Parse the pipe-construct described in the User Handbook
1087        // section 2-2.8 "SPECIAL SYMBOLS" as "ₚ|ₜ" (though in reality
1088        // the pipe should also be subscript and in their example they
1089        // use a subscript q).
1090        //
1091        // The Handbook is not explicit on whether the "ₚ" or "ₜ" can
1092        // contain spaces.  We will assume not, for simplicity (at
1093        // least for the time being).
1094        //
1095        // "ADXₚ|ₜQ" should be equvialent to ADXₚ{Qₜ}*.  So we need to
1096        // generate an RC-word containing Qₜ.
1097
1098        let spanned_p_fragment = symbol_or_literal(Script::Sub) // this is p
1099            .map_with(|p, extra| SpannedSymbolOrLiteral {
1100                item: p,
1101                span: extra.span(),
1102            })
1103            .boxed();
1104
1105        let spanned_tq_fragment = symbol_or_literal(Script::Sub) // this is t
1106            .map_with(|t, extra| SpannedSymbolOrLiteral {
1107                item: t,
1108                span: extra.span(),
1109            })
1110            .then(
1111                program_instruction_fragment // this is Q
1112                    .clone()
1113                    .map_with(|q, extra| (q, extra.span())),
1114            )
1115            .boxed();
1116        let pipe_construct = spanned_p_fragment
1117            .then_ignore(just(Tok::Pipe(Script::Sub)))
1118            .then(spanned_tq_fragment)
1119            .map(make_pipe_construct)
1120            .labelled("pipe construct");
1121
1122        let single_script_fragment = |script_required| {
1123            arith_expr.clone()(true, script_required).map(InstructionFragment::from)
1124        };
1125
1126        // A configuration syllable is not permitted to contain spaces
1127        // (per section 6-1.2 "INSTRUCTION WORDS" of the Users
1128        // Handbook).  So we need to prevent symex matching accepting
1129        // spaces.
1130        let config_value = choice((
1131            just(Tok::DoublePipe(Script::Normal)).ignore_then(
1132                arith_expr.clone()(false, Script::Normal).map(|expr| ConfigValue {
1133                    already_superscript: false,
1134                    expr,
1135                }),
1136            ),
1137            arith_expr.clone()(false, Script::Super).map(|expr| ConfigValue {
1138                already_superscript: true,
1139                expr,
1140            }),
1141        ))
1142        .try_map_with(|config_val, extra| {
1143            let span: Span = extra.span();
1144            let range: Range<usize> = span.into();
1145            let slice: &str = &extra.state().body[range];
1146
1147            if slice.contains(' ') {
1148                // Spaces in configuration values are prohibited by
1149                // the rule given in the Users handbook, section 6-2.1.
1150                Err(Rich::custom(
1151                    span,
1152                    format!("configuration value '{slice}' should not contain spaces"),
1153                ))
1154            } else {
1155                Ok(InstructionFragment::Config(config_val))
1156            }
1157        })
1158        .labelled("configuration value");
1159
1160        choice((
1161            pipe_construct,
1162            single_script_fragment(Script::Normal),
1163            single_script_fragment(Script::Sub),
1164            asterisk_indirection_fragment(),
1165            config_value,
1166        ))
1167        .labelled("program instruction")
1168    });
1169
1170    comma_delimited_instructions.define({
1171        let untagged_program_instruction = maybe_hold()
1172            .then(program_instruction_fragment.clone())
1173            .map_with(
1174                |(maybe_hold, fragment): (Option<HoldBit>, InstructionFragment), extra| {
1175                    FragmentWithHold {
1176                        span: extra.span(),
1177                        holdbit: maybe_hold.unwrap_or(HoldBit::Unspecified),
1178                        fragment,
1179                    }
1180                },
1181            );
1182
1183        choice((
1184            commas().map(|c| CommasOrInstruction::C(Some(c))),
1185            untagged_program_instruction
1186                .clone()
1187                .map(CommasOrInstruction::I),
1188        ))
1189        .repeated()
1190        .at_least(1)
1191        .collect::<Vec<CommasOrInstruction>>()
1192        .map(|ci_vec| instructions_with_comma_counts(ci_vec.into_iter()))
1193        .map(|cdfs| match OneOrMore::try_from_iter(cdfs.into_iter()) {
1194            Ok(cdfs) => cdfs,
1195            Err(_) => {
1196                unreachable!("instructions_with_comma_counts generated an empty output");
1197            }
1198        })
1199    });
1200
1201    // Assginments are called "equalities" in the TX-2 Users Handbook.
1202    // See section 6-2.2, "SYMEX DEFINITON - TAGS - EQUALITIES -
1203    // AUTOMATIC ASSIGNMENT".
1204    let assignment = (symex::parse_symex(SymexSyllableRule::Multiple, Script::Normal)
1205        .then_ignore(just(Tok::Equals(Script::Normal)))
1206        .then(comma_delimited_instructions.clone().map_with(
1207            |val: OneOrMore<CommaDelimitedFragment>, extra| {
1208                EqualityValue::from((extra.span(), UntaggedProgramInstruction::from(val)))
1209            },
1210        )))
1211    .map_with(|(name, value), extra| Equality {
1212        span: extra.span(),
1213        name,
1214        value,
1215    })
1216    .labelled("equality (assignment)");
1217
1218    let tagged_program_instruction = tagged_program_instruction.clone();
1219
1220    Grammar {
1221        assignment: assignment.boxed(),
1222        tagged_program_instruction: tagged_program_instruction.boxed(),
1223        normal_arithmetic_expression_allowing_spaces: arith_expr.clone()(
1224            ALLOW_SPACES,
1225            Script::Normal,
1226        )
1227        .boxed(),
1228        superscript_arithmetic_expression_allowing_spaces: arith_expr.clone()(
1229            ALLOW_SPACES,
1230            Script::Super,
1231        )
1232        .boxed(),
1233        subscript_arithmetic_expression_allowing_spaces: arith_expr.clone()(
1234            ALLOW_SPACES,
1235            Script::Sub,
1236        )
1237        .boxed(),
1238        #[cfg(test)]
1239        instruction_fragment: program_instruction_fragment.boxed(),
1240    }
1241}
1242
1243#[cfg(test)]
1244fn tagged_instruction<'a, I>()
1245-> impl Parser<'a, I, TaggedProgramInstruction, ExtraWithoutContext<'a>>
1246where
1247    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1248{
1249    grammar().tagged_program_instruction
1250}
1251
1252fn manuscript_line<'a, I>() -> impl Parser<'a, I, ManuscriptLine, ExtraWithoutContext<'a>>
1253where
1254    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1255{
1256    fn execute_metacommand(state: &mut NumeralMode, cmd: &ManuscriptMetaCommand) {
1257        match cmd {
1258            ManuscriptMetaCommand::Punch(_) | ManuscriptMetaCommand::Macro(_) => {
1259                // Instead of executing this metacommand as we parse it,
1260                // we simply return it as part of the parser output, and
1261                // it is executed by the driver.
1262            }
1263            ManuscriptMetaCommand::BaseChange(new_base) => state.set_numeral_mode(*new_base),
1264        }
1265    }
1266
1267    fn parse_and_execute_metacommand<'a, 'b, I>(
1268        grammar: &Grammar<'a, 'b, I>,
1269    ) -> impl Parser<'a, I, ManuscriptLine, ExtraWithoutContext<'a>> + use<'a, 'b, I>
1270    where
1271        I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1272    {
1273        metacommand(grammar)
1274            .map_with(|cmd, extra| {
1275                execute_metacommand(&mut extra.state().numeral_mode, &cmd);
1276                ManuscriptLine::Meta(cmd)
1277            })
1278            .labelled("metacommand")
1279    }
1280
1281    fn build_code_line(
1282        (maybe_origin, statement): (Option<Origin>, TaggedProgramInstruction),
1283    ) -> ManuscriptLine {
1284        match maybe_origin {
1285            None => ManuscriptLine::StatementOnly(statement),
1286            Some(origin) => ManuscriptLine::OriginAndStatement(origin, statement),
1287        }
1288    }
1289
1290    fn line<'a, I>() -> impl Parser<'a, I, ManuscriptLine, ExtraWithoutContext<'a>>
1291    where
1292        I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1293    {
1294        fn origin<'a, I>() -> impl Parser<'a, I, Origin, ExtraWithoutContext<'a>>
1295        where
1296            I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1297        {
1298            /// An address expression is a literal value or a symex.  That is I
1299            /// think it's not required that an arithmetic expression
1300            /// (e.g. "5+BAR") be accepted in an origin notation (such as
1301            /// "<something>|").
1302            fn literal_address_expression<'a, I>()
1303            -> impl Parser<'a, I, Origin, ExtraWithoutContext<'a>>
1304            where
1305                I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1306            {
1307                literal(Script::Normal)
1308                    .then_ignore(just(Tok::Pipe(Script::Normal)))
1309                    .try_map(|lit, span| match Address::try_from(lit.value()) {
1310                        Ok(addr) => Ok(Origin::Literal(span, addr)),
1311                        Err(e) => Err(Rich::custom(span, format!("not a valid address: {e}"))),
1312                    })
1313                    .labelled("literal address expression")
1314            }
1315
1316            fn symbolic_address_expression<'a, I>()
1317            -> impl Parser<'a, I, Origin, ExtraWithoutContext<'a>>
1318            where
1319                I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1320            {
1321                named_symbol(SymexSyllableRule::Multiple, Script::Normal)
1322                    .then_ignore(just(Tok::Pipe(Script::Normal)))
1323                    .map_with(|name, extra| Origin::Symbolic(extra.span(), name))
1324                    .labelled("symbolic address expression")
1325            }
1326
1327            // An origin specification is an expression followed by a
1328            // (normal-case) pipe symbol.
1329            choice((literal_address_expression(), symbolic_address_expression()))
1330                .labelled("origin specification")
1331        }
1332
1333        let grammar = grammar();
1334
1335        let optional_origin_with_statement = origin()
1336            .or_not()
1337            .then(grammar.tagged_program_instruction.clone())
1338            .map(build_code_line)
1339            .labelled("statement with origin");
1340
1341        // TODO: also allowed: "T1->T2->ORIGIN|"
1342        let origin_only = origin().map(ManuscriptLine::OriginOnly);
1343        let tags_only = tag_definition()
1344            .repeated()
1345            .at_least(1)
1346            .collect()
1347            .map(ManuscriptLine::TagsOnly);
1348        let equality = grammar.assignment.clone().map(ManuscriptLine::Eq);
1349
1350        choice((
1351            // We have to parse an assignment first here, in order to
1352            // accept "FOO=2" as an assignment rather than the instruction
1353            // fragment "FOO" followed by a syntax error.
1354            equality,
1355            macro_invocation().map(ManuscriptLine::Macro),
1356            // Ignore whitespace after the metacommand but not before it.
1357            parse_and_execute_metacommand(&grammar),
1358            optional_origin_with_statement,
1359            // Because we prefer to parse a statement if one exists,
1360            // the origin_only alternative has to appear after the
1361            // alternative which parses a statement.
1362            origin_only,
1363            // Similarly for lines containing only tag efinitions.
1364            tags_only,
1365        ))
1366    }
1367
1368    line()
1369}
1370
1371fn end_of_line<'a, I>() -> impl Parser<'a, I, (), ExtraWithoutContext<'a>>
1372where
1373    I: Input<'a, Token = Tok, Span = Span>,
1374{
1375    let one_end_of_line = just(Tok::Newline).labelled("end-of-line").ignored();
1376
1377    one_end_of_line
1378        .repeated()
1379        .at_least(1)
1380        .ignored()
1381        .labelled("comment or end-of-line")
1382}
1383
1384fn terminated_manuscript_line<'a, I>()
1385-> impl Parser<'a, I, Option<(Span, ManuscriptLine)>, ExtraWithoutContext<'a>>
1386where
1387    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1388{
1389    // If we support INSERT, DELETE, REPLACE, we will need to
1390    // separate the processing of the metacommands and the
1391    // generation of the assembled code.
1392    manuscript_line()
1393        .or_not()
1394        .map_with(|maybe_line, extra| maybe_line.map(|line| (extra.span(), line)))
1395        .then_ignore(end_of_line())
1396}
1397
1398pub(crate) fn source_file<'a, I>() -> impl Parser<'a, I, SourceFile, ExtraWithoutContext<'a>>
1399where
1400    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
1401{
1402    fn inconsistency_error<'src>(
1403        span: Span,
1404        name: &SymbolName,
1405        what: &str,
1406    ) -> chumsky::error::Rich<'src, lexer::Token> {
1407        Rich::custom(
1408            span,
1409            format!("internal error: inconsistent parser state for macro {name}: {what}"),
1410        )
1411    }
1412
1413    fn check_consistent<'a>(
1414        macro_def_in_source_file: Option<&MacroDefinition>,
1415        macro_def_in_state: Option<&MacroDefinition>,
1416    ) -> Result<(), chumsky::error::Rich<'a, lexer::Token>> {
1417        match (macro_def_in_source_file, macro_def_in_state) {
1418            (None, None) => {
1419                panic!("all_name is incorrect");
1420            }
1421            (None, Some(state_def)) => Err(inconsistency_error(
1422                state_def.span,
1423                &state_def.name,
1424                "missing from SourceFile output",
1425            )),
1426            (Some(source_file_def), None) => Err(inconsistency_error(
1427                source_file_def.span,
1428                &source_file_def.name,
1429                "missing from State",
1430            )),
1431            (Some(source_file_def), Some(state_def)) => {
1432                if source_file_def == state_def {
1433                    Ok(())
1434                } else {
1435                    Err(inconsistency_error(
1436                        source_file_def.span,
1437                        &source_file_def.name,
1438                        "inconsistently defined",
1439                    ))
1440                }
1441            }
1442        }
1443    }
1444
1445    terminated_manuscript_line()
1446        .repeated()
1447        .collect()
1448        .try_map_with(|lines: Vec<Option<(Span, ManuscriptLine)>>, extra| {
1449            // Filter out empty lines.
1450            let lines: Vec<(Span, ManuscriptLine)> = lines.into_iter().flatten().collect();
1451            let source_file: SourceFile = manuscript_lines_to_source_file(lines)?;
1452            let state_macros: BTreeMap<SymbolName, MacroDefinition> =
1453                extra.state().macros().clone();
1454            let all_names: BTreeSet<&SymbolName> = source_file
1455                .macros
1456                .keys()
1457                .chain(state_macros.keys())
1458                .collect();
1459            for name in all_names {
1460                check_consistent(
1461                    source_file.macros.get(name),
1462                    extra.state().macros().get(name),
1463                )?;
1464            }
1465            Ok(source_file)
1466        })
1467}
1468
1469type Mig<I, O> = chumsky::input::MappedInput<
1470    Tok,
1471    SimpleSpan,
1472    chumsky::input::Stream<std::vec::IntoIter<(Tok, SimpleSpan)>>,
1473    fn(I) -> O,
1474>;
1475pub(crate) type Mi = Mig<(Tok, SimpleSpan), (Tok, SimpleSpan)>;
1476
1477pub(crate) fn tokenize_and_parse_with<'a, P, T, F>(
1478    input: &'a str,
1479    mut setup: F,
1480    parser: P,
1481) -> (Option<T>, Vec<Rich<'a, Tok>>)
1482where
1483    F: FnMut(&mut State),
1484    P: Parser<'a, Mi, T, ExtraWithoutContext<'a>>,
1485{
1486    let mut state = State::new(input, NumeralMode::default());
1487    setup(&mut state);
1488
1489    // These conversions are adapted from the Logos example in the
1490    // Chumsky documentation.
1491    let scanner = lexer::Lexer::new(input).spanned();
1492    let tokens: Vec<(Tok, SimpleSpan)> = scanner
1493        .map(|item: (Tok, Range<usize>)| -> (Tok, Span) {
1494            match item {
1495                (Tok::Tab, span) => {
1496                    // The basic problem here is that the TX-2's
1497                    // M4 assembler allows a space to occur in the
1498                    // middle of a symex.  We implement this in
1499                    // the parser by returning the individual
1500                    // parts from the lexer and having the parser
1501                    // join them together.  The lexer doesn't
1502                    // return spaces.  In order to prevent the
1503                    // parser joining together "XY\tZ" in a
1504                    // similar way we would need to return TAB as
1505                    // a lexeme.  The problem with doing that
1506                    // though is that the parser would have to
1507                    // permit the TAB token between regular
1508                    // tokens everywhere in the grammar except
1509                    // between two symex components.  That would
1510                    // make the grammar difficult to maintain (and
1511                    // difficult to specify without bugs).
1512                    (Tok::Error(lexer::ErrorTokenKind::Tab), span.into())
1513                }
1514                (tok, span) => {
1515                    // Turn the `Range<usize>` spans logos gives us into
1516                    // chumsky's `SimpleSpan` via `Into`, because it's
1517                    // easier to work with
1518                    (tok, span.into())
1519                }
1520            }
1521        })
1522        .collect();
1523    let end_span: SimpleSpan = SimpleSpan::new(
1524        0,
1525        tokens.iter().map(|(_, span)| span.end).max().unwrap_or(0),
1526    );
1527    let token_stream: Mi = Stream::from_iter(tokens).map(end_span, |unchanged| unchanged);
1528    parser
1529        .parse_with_state(token_stream, &mut state)
1530        .into_output_errors()
1531}
1532
1533pub(crate) fn parse_source_file(
1534    source_file_body: &str,
1535    setup: fn(&mut State),
1536) -> (Option<SourceFile>, Vec<Rich<'_, Tok>>) {
1537    let parser = source_file();
1538    tokenize_and_parse_with(source_file_body, setup, parser)
1539}
1540
1541// Local Variables:
1542// mode: rustic
1543// lsp-rust-analyzer-server-display-inlay-hints: nil
1544// End: