assembler/parser/
symex.rs

1//! Helper functions for parsing symex names.
2//!
3//! See section 6-2.3 "RULES FOR SYMEX FORMATION".
4//!
5use base::charset::Script;
6use chumsky::Parser;
7use chumsky::input::ValueInput;
8use chumsky::prelude::*;
9
10use super::super::lexer::{DOT_CHAR, DOT_STR};
11use super::super::span::Span;
12use super::super::symbol::SymbolName;
13use super::helpers::{self};
14use super::{ExtraWithoutContext, Tok, opcode_code};
15
16/// Squeze spaces from a string to make a canonical symex name.
17///
18/// The caller must ensure that the passed-in string is indeed a
19/// single symex name.  Meaning that the input `"TYPE A"` is valid but
20/// `"A TYPE"` and `"ADD Y"` are not, since neither the AE register
21/// name `"A"`nor the opcode `"ADD"` can be the first syllavble of a
22/// symex name containing a space.
23fn canonical_symbol_name(s: &str) -> SymbolName {
24    // TODO: avoid copy where possible.
25    SymbolName {
26        canonical: s
27            .chars()
28            .filter(|ch: &char| -> bool { *ch != ' ' })
29            .collect(),
30    }
31}
32
33/// Decide whether the passed in identifier is a reserved identifier
34/// (i.e. an AE element name or an opcode name).
35fn is_reserved_identifier(ident: &str) -> bool {
36    helpers::is_arithmetic_element_register_name(ident) || opcode_code(ident).is_some()
37}
38
39// Compound chars are not supported at the moment, see docs/assembler/index.md.
40pub(super) fn digits_as_symex<'a, I>(
41    script_required: Script,
42) -> impl Parser<'a, I, (String, Option<char>), ExtraWithoutContext<'a>> + Clone
43where
44    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
45{
46    select! {
47        Tok::Digits(script, literal) if script == script_required => literal,
48    }
49    .map(|literal| {
50        let maybe_dot: Option<char> = if literal.has_trailing_dot() {
51            Some(DOT_CHAR)
52        } else {
53            None
54        };
55        (literal.take_digits(), maybe_dot)
56    })
57}
58
59// Compound chars are not supported at the moment, see docs/assembler/index.md.
60pub(super) fn symex_syllable<'a, I>(
61    script_required: Script,
62) -> impl Parser<'a, I, String, ExtraWithoutContext<'a>> + Clone
63where
64    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
65{
66    fn append_possible_dot((mut prefix, maybe_dot): (String, Option<char>)) -> String {
67        match maybe_dot {
68            Some(dot) => {
69                prefix.push(dot);
70                prefix
71            }
72            None => prefix,
73        }
74    }
75
76    let one_dot = just(Tok::Dot(script_required))
77        .to(DOT_CHAR)
78        .labelled(DOT_STR);
79
80    let maybe_dot = one_dot.clone().or_not();
81    let without_dot = select! {
82        Tok::SymexSyllable(script, name) if script == script_required => name,
83    };
84
85    // The dot is a macro terminator.  So eventually we will need to
86    // distinguish two meanings of "X@dot@".  The first being a symex
87    // named "X@dot@" and the second being a reference to a macro
88    // called X with the dot as its terminator.
89    choice((
90        without_dot.then(maybe_dot).map(append_possible_dot),
91        digits_as_symex(script_required).map(append_possible_dot),
92        one_dot.to(super::lexer::DOT_STR.to_string()),
93    ))
94    .labelled("symex syllable")
95}
96
97fn parse_symex_non_reserved_syllable<'a, I>(
98    script_required: Script,
99) -> impl Parser<'a, I, String, ExtraWithoutContext<'a>> + Clone
100where
101    I: Input<'a, Token = Tok, Span = SimpleSpan> + ValueInput<'a>,
102{
103    symex_syllable(script_required).try_map(move |syllable, span| {
104        if is_reserved_identifier(&syllable) {
105            Err(Rich::custom(
106                span,
107                format!("'{syllable}' is a reserved identifier"),
108            ))
109        } else {
110            Ok(syllable)
111        }
112    })
113}
114
115/// Distinguishes a single-syllable from a multi-syllable symex.
116#[derive(Debug, Eq, PartialEq, Copy, Clone)]
117pub(super) enum SymexSyllableRule {
118    /// A single syllable symex (and so this includes reserved
119    /// identifiers such as "A", "B", "ADD", "TLY").
120    OneOnly,
121    /// A multi syllable symex (which might include a reserved
122    /// identifier, but not as the first syllable).
123    Multiple,
124}
125
126pub(super) fn parse_multi_syllable_symex<'a: 'b, 'b, I>(
127    rule: SymexSyllableRule,
128    script_required: Script,
129) -> Boxed<'a, 'b, I, String, ExtraWithoutContext<'a>>
130where
131    I: Input<'a, Token = Tok, Span = SimpleSpan> + ValueInput<'a>,
132{
133    // Pass by value here is harmless and simplifies the foldl below.
134    #[allow(clippy::needless_pass_by_value)]
135    fn concat_strings(mut s: String, next: String) -> String {
136        s.push_str(&next);
137        s
138    }
139
140    match rule {
141        SymexSyllableRule::OneOnly => symex_syllable(script_required)
142            .labelled("single-syllable symex")
143            .boxed(),
144        SymexSyllableRule::Multiple => parse_symex_non_reserved_syllable(script_required)
145            .foldl(symex_syllable(script_required).repeated(), concat_strings)
146            .labelled("multi-syllable symex")
147            .boxed(),
148    }
149}
150
151/// Parse a symex having the specified script, according to `rule`.
152///
153/// If the input doesn't have the expected script, the parser fails.
154pub(super) fn parse_symex<'a, I>(
155    rule: SymexSyllableRule,
156    script_required: Script,
157) -> impl Parser<'a, I, SymbolName, ExtraWithoutContext<'a>> + Clone
158where
159    I: Input<'a, Token = Tok, Span = Span> + ValueInput<'a>,
160{
161    choice((
162        parse_multi_syllable_symex(rule, script_required),
163        parse_symex_reserved_syllable(script_required),
164    ))
165    .map(|s| canonical_symbol_name(&s))
166    .labelled("symbol name")
167}
168
169pub(super) fn parse_symex_reserved_syllable<'a, I>(
170    script_required: Script,
171) -> impl Parser<'a, I, String, ExtraWithoutContext<'a>> + Clone
172where
173    I: Input<'a, Token = Tok, Span = SimpleSpan> + ValueInput<'a>,
174{
175    symex_syllable(script_required)
176        .try_map(move |syllable, span| {
177            if is_reserved_identifier(&syllable) {
178                Ok(syllable)
179            } else {
180                Err(Rich::custom(span, "expected reserved syllable".to_string()))
181            }
182        })
183        .labelled("reserved symex")
184}