assembler/
driver.rs

1//! Invoke the various passes of the assembler.
2mod output;
3
4#[cfg(test)]
5mod tests;
6
7use std::collections::BTreeMap;
8use std::ffi::OsStr;
9use std::fs::OpenOptions;
10use std::io::{BufReader, BufWriter, Read};
11use std::path::{Path, PathBuf};
12
13use chumsky::error::Rich;
14use tracing::{Level, event, span};
15
16#[cfg(test)]
17use super::ast::{
18    ArithmeticExpression, Atom, CommaDelimitedFragment, HoldBit, InstructionFragment,
19    InstructionSequence, LiteralValue, TaggedProgramInstruction, UntaggedProgramInstruction,
20};
21use super::ast::{Origin, RcUpdater};
22use super::collections::OneOrMore;
23use super::directive::Directive;
24use super::eval::Evaluate;
25use super::eval::HereValue;
26use super::eval::ScopeIdentifier;
27use super::eval::{EvaluationContext, RcBlock, extract_final_equalities};
28use super::lexer;
29use super::listing::{Listing, ListingLine, ListingWithBody};
30#[cfg(test)]
31use super::manuscript::ManuscriptBlock;
32#[cfg(test)]
33use super::manuscript::PunchCommand;
34use super::manuscript::SourceFile;
35use super::memorymap::{
36    BlockPosition, MemoryMap, RcAllocator, RcWordAllocationFailure, RcWordSource,
37};
38use super::parser::parse_source_file;
39use super::source::Source;
40use super::source::{LineAndColumn, WithLocation};
41#[cfg(test)]
42use super::span::span;
43use super::span::{Span, Spanned};
44use super::state::{NumeralMode, State};
45use super::symbol::SymbolName;
46use super::symtab::{
47    ExplicitSymbolTable, FinalSymbolDefinition, FinalSymbolTable, FinalSymbolType,
48    ImplicitSymbolTable, IndexRegisterAssigner, assign_default_rc_word_tags,
49};
50use super::types::{AssemblerFailure, IoAction, IoFailed, IoTarget, ProgramError};
51use base::prelude::{Address, IndexBy, Unsigned18Bit, Unsigned36Bit};
52use base::subword;
53pub use output::write_user_program;
54
55#[cfg(test)]
56use base::charset::Script;
57#[cfg(test)]
58use base::u36;
59
60/// Represents the meta commands which are still relevant in the
61/// directive.  Excludes things like the PUNCH meta command.
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum DirectiveMetaCommand {
64    Invalid, // e.g."☛☛BOGUS"
65    BaseChange(NumeralMode),
66}
67
68/// Indicates what kind of output the user wants.
69#[derive(Debug, Clone, PartialEq, Eq, Default)]
70pub struct OutputOptions {
71    // TODO: implement arguments of the LIST, PLIST, TYPE
72    // metacommands.
73    pub list: bool,
74}
75
76impl OutputOptions {
77    // Merge two `OutputOptions` instances.  We deliberately consume
78    // both `self` and `other` in order to force the caller to use
79    // only the returned result.
80    #![allow(clippy::needless_pass_by_value)]
81    #[must_use]
82    fn merge(self, other: OutputOptions) -> OutputOptions {
83        OutputOptions {
84            list: self.list || other.list,
85        }
86    }
87}
88
89/// Pass 1 converts the program source into an abstract syntax representation.
90fn assemble_pass1<'a, 'b: 'a>(
91    source_file_body: &'b Source<'a>,
92    errors: &mut Vec<Rich<'a, lexer::Token>>,
93) -> (Option<SourceFile>, OutputOptions) {
94    fn setup(state: &mut State) {
95        // Octal is actually the default numeral mode, we just call
96        // set_numeral_mode here to keep Clippy happy until we
97        // implement ☛☛DECIMAL and ☛☛OCTAL.
98        state.numeral_mode.set_numeral_mode(NumeralMode::Decimal); // appease Clippy
99        state.numeral_mode.set_numeral_mode(NumeralMode::Octal);
100    }
101
102    let span = span!(Level::ERROR, "assembly pass 1");
103    let _enter = span.enter();
104    let options = OutputOptions { list: false };
105
106    let (mut sf, mut new_errors) = parse_source_file(source_file_body.as_str(), setup);
107    errors.append(&mut new_errors);
108
109    if let Some(source_file) = sf.as_mut()
110        && let Err(tag_errors) = source_file.build_local_symbol_tables()
111    {
112        errors.extend(
113            tag_errors
114                .into_iter()
115                .map(|tag_err| Rich::custom(tag_err.span(), tag_err.to_string())),
116        );
117    }
118    (sf, options)
119}
120
121/// Describes the result of running the first (and possibly the
122/// second) pass of the assembler.
123#[derive(Debug, PartialEq, Eq)]
124enum AssemblerPass1Or2Output<'a> {
125    Pass1Failed(Result<OneOrMore<Rich<'a, lexer::Token>>, AssemblerFailure>),
126    Pass2Failed(AssemblerFailure),
127    Success(Vec<Rich<'a, lexer::Token>>, OutputOptions, Pass2Output<'a>),
128}
129
130/// Assemble a non-empty input.
131fn assemble_nonempty_input<'a, 'b: 'a>(input: &'b Source<'a>) -> AssemblerPass1Or2Output<'a> {
132    let mut errors: Vec<Rich<'_, lexer::Token>> = Vec::new();
133    let (maybe_source_file, output_options) = assemble_pass1(input, &mut errors);
134    match maybe_source_file {
135        None => match OneOrMore::try_from_vec(errors) {
136            Ok(errors) => AssemblerPass1Or2Output::Pass1Failed(Ok(errors)),
137            Err(_) => {
138                unreachable!(
139                    "assemble_pass1 returned no SourceFile instance but there were no output errors either"
140                );
141            }
142        },
143        Some(source_file) => match assemble_pass2(source_file, input) {
144            Err(e) => AssemblerPass1Or2Output::Pass2Failed(e),
145            Ok(p2output) => AssemblerPass1Or2Output::Success(errors, output_options, p2output),
146        },
147    }
148}
149
150/// This test helper is defined here so that we don't have to expose
151/// `assemble_pass1` or `assemble_pass2`.
152#[cfg(test)]
153pub(crate) fn assemble_nonempty_valid_input(
154    input: &str,
155) -> (
156    Directive,
157    ExplicitSymbolTable,
158    ImplicitSymbolTable,
159    MemoryMap,
160    IndexRegisterAssigner,
161) {
162    let input_source = Source::new(input);
163    match assemble_nonempty_input(&input_source) {
164        AssemblerPass1Or2Output::Pass1Failed(Err(e)) => {
165            panic!("pass 1 failed with an error result: {e}");
166        }
167        AssemblerPass1Or2Output::Pass1Failed(Ok(errors)) => {
168            panic!("pass 1 failed with diagnostics: {errors:?}");
169        }
170        AssemblerPass1Or2Output::Pass2Failed(e) => {
171            panic!("pass 2 failed with an error result: {e}");
172        }
173        AssemblerPass1Or2Output::Success(errors, _output_options, p2output) => {
174            if errors.is_empty() {
175                match p2output {
176                    Pass2Output {
177                        directive: None, ..
178                    } => {
179                        panic!("directive is None but no errors were reported");
180                    }
181                    Pass2Output {
182                        directive: Some(directive),
183                        explicit_symbols,
184                        implicit_symbols,
185                        memory_map,
186                        index_register_assigner,
187                        errors,
188                    } => {
189                        if errors.is_empty() {
190                            (
191                                directive,
192                                explicit_symbols,
193                                implicit_symbols,
194                                memory_map,
195                                index_register_assigner,
196                            )
197                        } else {
198                            panic!("input should be valid: {:?}", &errors);
199                        }
200                    }
201                }
202            } else {
203                panic!("pass 2 failed with diagnostics: {errors:?}");
204            }
205        }
206    }
207}
208
209/// A contiguous sequence of words at some starting address.
210#[derive(Debug, Clone, PartialEq, Eq, Default)]
211pub struct BinaryChunk {
212    /// Starting address
213    pub address: Address,
214    /// The words in the chunk.
215    pub words: Vec<Unsigned36Bit>,
216}
217
218impl BinaryChunk {
219    #[must_use]
220    pub fn is_empty(&self) -> bool {
221        self.words.is_empty()
222    }
223
224    fn count_words(&self) -> usize {
225        self.words.len()
226    }
227
228    pub fn push(&mut self, w: Unsigned36Bit) {
229        self.words.push(w);
230    }
231}
232
233impl From<RcBlock> for BinaryChunk {
234    fn from(block: RcBlock) -> Self {
235        BinaryChunk {
236            address: block.address,
237            words: block
238                .words
239                .into_iter()
240                .map(|(_source, word)| word)
241                .collect(),
242        }
243    }
244}
245
246/// The assembled program; a sequence of [`BinaryChunk`] instances
247/// with an optional entry point.
248#[derive(Debug, Clone, PartialEq, Eq, Default)]
249pub struct Binary {
250    entry_point: Option<Address>,
251    chunks: Vec<BinaryChunk>,
252}
253
254impl Binary {
255    fn count_words(&self) -> usize {
256        self.chunks().iter().map(BinaryChunk::count_words).sum()
257    }
258
259    fn entry_point(&self) -> Option<Address> {
260        self.entry_point
261    }
262
263    fn set_entry_point(&mut self, address: Address) {
264        self.entry_point = Some(address);
265    }
266
267    pub fn add_chunk(&mut self, chunk: BinaryChunk) {
268        self.chunks.push(chunk);
269    }
270
271    fn chunks(&self) -> &[BinaryChunk] {
272        &self.chunks
273    }
274
275    fn is_empty(&self) -> bool {
276        self.chunks.is_empty()
277    }
278}
279
280/// Output of pass 2 of the assembler.
281#[derive(Debug, PartialEq, Eq)]
282struct Pass2Output<'a> {
283    /// An abstract representation of the source code.
284    directive: Option<Directive>,
285    /// Explicit symbol definitions ("equalities")
286    explicit_symbols: ExplicitSymbolTable,
287    /// Information about symbols which are known but lack a definition.
288    implicit_symbols: ImplicitSymbolTable,
289    /// Location of the blocks of the program.
290    memory_map: MemoryMap,
291    /// Provides for default-assignment of symbols used only in an
292    /// index context.
293    index_register_assigner: IndexRegisterAssigner, // not cloneable
294    /// Syntac or semantic errors diagnosed so far.  We use this
295    /// instead of `Result<T,E>` so that we can diagnose more than one
296    /// error.
297    errors: Vec<Rich<'a, lexer::Token>>,
298}
299
300fn initial_symbol_table<'a>(
301    source_file: &SourceFile,
302) -> Result<(ExplicitSymbolTable, ImplicitSymbolTable), OneOrMore<Rich<'a, lexer::Token>>> {
303    let mut errors = Vec::new();
304    // TODO: split these out into separate functions.
305    let mut explicit_symbols = ExplicitSymbolTable::new();
306    // All explicit definitions in the program take effect either
307    // locally (for the bodies of macro expansions) or globally (for
308    // everything else).  So, before we can enumerate all global
309    // symbol references, we need to identidy which symbol references
310    // are references to something defined in a local scope.  And so
311    // we need to enumerate definitions before references.
312    for r in source_file.global_symbol_definitions() {
313        match r {
314            Ok((symbol, span, definition)) => {
315                match explicit_symbols.define(symbol.clone(), definition.clone()) {
316                    Ok(()) => (),
317                    Err(e) => {
318                        errors.push(Rich::custom(
319                            span,
320                            format!("bad symbol definition for {symbol}: {e}"),
321                        ));
322                    }
323                }
324            }
325            Err(e) => {
326                let span: Span = e.span();
327                errors.push(Rich::custom(span, e.to_string()));
328            }
329        }
330    }
331    let mut implicit_symbols = ImplicitSymbolTable::default();
332    for r in source_file.global_symbol_references() {
333        match r {
334            Ok((symbol, span, context)) => {
335                if !explicit_symbols.is_defined(&symbol)
336                    && let Err(e) = implicit_symbols.record_usage_context(&symbol, &context)
337                {
338                    errors.push(Rich::custom(span, e.to_string()));
339                }
340            }
341            Err(e) => {
342                let span: Span = e.span();
343                errors.push(Rich::custom(span, e.to_string()));
344            }
345        }
346    }
347    match OneOrMore::try_from_vec(errors) {
348        Ok(errors) => Err(errors),
349        Err(_) => Ok((explicit_symbols, implicit_symbols)),
350    }
351}
352
353/// Pass 2 converts the abstract syntax representation into a
354/// `Directive`, which is closer to binary code.
355///
356/// The `source_file` input is essentially an abstract syntax
357/// representation.  The output is a symbol table and a "directive"
358/// which is a sequence of blocks of code of known position and size
359/// (but the contents of which are not yet populated).
360fn assemble_pass2<'s>(
361    source_file: SourceFile,
362    source_file_body: &Source<'s>,
363) -> Result<Pass2Output<'s>, AssemblerFailure> {
364    let span = span!(Level::ERROR, "assembly pass 2");
365    let _enter = span.enter();
366
367    let mut memory_map = MemoryMap::new(source_file.blocks.iter().map(|block| {
368        let span: Span = block.origin_span();
369        (span, block.origin.clone(), block.instruction_count())
370    }));
371
372    let (mut explicit_symbols, mut implicit_symbols) = match initial_symbol_table(&source_file) {
373        Ok(syms) => syms,
374        Err(errors) => {
375            return Err(AssemblerFailure::BadProgram(fail_with_diagnostics(
376                source_file_body,
377                errors,
378            )));
379        }
380    };
381    let mut index_register_assigner: IndexRegisterAssigner = IndexRegisterAssigner::default();
382    let mut no_rc_allocation = NoRcBlock {
383        why_blocked: "we don't expect origin computation to require RC-word allocation",
384    };
385    let tmp_blocks: Vec<BlockPosition> = memory_map.iter().cloned().collect();
386    for block_position in tmp_blocks {
387        let mut ctx = EvaluationContext {
388            explicit_symtab: &mut explicit_symbols,
389            implicit_symtab: &mut implicit_symbols,
390            memory_map: &memory_map,
391            here: HereValue::NotAllowed,
392            index_register_assigner: &mut index_register_assigner,
393            rc_updater: &mut no_rc_allocation,
394            lookup_operation: Default::default(),
395        };
396        let scope = ScopeIdentifier::global();
397        match block_position.evaluate(&mut ctx, scope) {
398            Ok(value) => {
399                if !ctx.index_register_assigner.is_empty() {
400                    return Err(AssemblerFailure::InternalError(format!(
401                        "While determining the addresses of {0}, we assigned an index register.  Block origins should not depend on index registers",
402                        &block_position.block_identifier
403                    )));
404                }
405
406                let address: Address = subword::right_half(value).into();
407                memory_map.set_block_position(block_position.block_identifier, address);
408            }
409            Err(e) => {
410                let prog_error: ProgramError = e.into_program_error();
411                return Err(prog_error.into_assembler_failure(source_file_body));
412            }
413        }
414    }
415
416    let directive = source_file.into_directive(&memory_map);
417    if let Some(instruction_count) = directive
418        .blocks
419        .values()
420        .try_fold(Unsigned18Bit::ZERO, |acc, b| {
421            acc.checked_add(b.emitted_word_count())
422        })
423    {
424        event!(
425            Level::INFO,
426            "assembly pass 2 generated {instruction_count} instructions"
427        );
428    }
429    Ok(Pass2Output {
430        directive: Some(directive),
431        explicit_symbols,
432        implicit_symbols,
433        memory_map,
434        index_register_assigner,
435        errors: Vec::new(),
436    })
437}
438
439/// Placeholder for the RC-block which will not allocate words.
440///
441/// The job of this struct is to prevent allocation of words in the
442/// RC-block while we are still trying to compute the origin of each
443/// block of the program.
444struct NoRcBlock {
445    why_blocked: &'static str,
446}
447
448impl RcAllocator for NoRcBlock {
449    fn allocate(
450        &mut self,
451        _source: RcWordSource,
452        _value: Unsigned36Bit,
453    ) -> Result<Address, RcWordAllocationFailure> {
454        panic!(
455            "Cannot allocate an RC-word before we know the address of the RC block: {}",
456            self.why_blocked
457        );
458    }
459}
460
461impl RcUpdater for NoRcBlock {
462    fn update(&mut self, _address: Address, _value: Unsigned36Bit) {
463        panic!(
464            "Cannot update an RC-word in an RC-block which cannot allocate words: {}",
465            self.why_blocked
466        );
467    }
468}
469
470/// Pass 3 generates binary code.
471fn assemble_pass3(
472    mut directive: Directive,
473    explicit_symtab: &mut ExplicitSymbolTable,
474    implicit_symtab: &mut ImplicitSymbolTable,
475    memory_map: &mut MemoryMap,
476    index_register_assigner: &mut IndexRegisterAssigner,
477    body: &Source,
478    listing: &mut Listing,
479) -> Result<(Binary, FinalSymbolTable), AssemblerFailure> {
480    let span = span!(Level::ERROR, "assembly pass 3");
481    let _enter = span.enter();
482    let mut binary = Binary::default();
483    if let Some(address) = directive.entry_point() {
484        binary.set_entry_point(address);
485    }
486
487    let mut rcblock = RcBlock {
488        address: directive.position_rc_block(),
489        words: Vec::new(),
490    };
491
492    let Directive {
493        mut blocks,
494        equalities,
495        entry_point: _,
496    } = directive;
497
498    // TODO: we should be able to convert implicit_symtab into
499    // final_symbols (and drop implicit_symtab).
500    let mut final_symbols = FinalSymbolTable::default();
501    let mut bad_symbol_definitions: BTreeMap<SymbolName, ProgramError> = Default::default();
502    // TODO: consider moving this into pass 2.
503    for block in blocks.values() {
504        if let Some(Origin::Symbolic(span, symbol_name)) = block.origin.as_ref()
505            && !explicit_symtab.is_defined(symbol_name)
506        {
507            final_symbols.define_if_undefined(
508                symbol_name.clone(),
509                FinalSymbolType::Tag, // actually origin
510                body.extract(span.start..span.end).to_string(),
511                FinalSymbolDefinition::PositionIndependent(block.location.into()),
512            );
513        }
514    }
515
516    // We call extract_final_equalities here is to ensure that we
517    // diagnose all looping definitions of equalities and get the data
518    // we need for the listing, if there will be one.  It isn't
519    // actually needed to generate the output binary.
520    extract_final_equalities(
521        equalities.as_slice(),
522        body,
523        explicit_symtab,
524        implicit_symtab,
525        memory_map,
526        index_register_assigner,
527        &mut rcblock,
528        &mut final_symbols,
529        &mut bad_symbol_definitions,
530    )?;
531
532    let convert_rc_failure = |e: RcWordAllocationFailure| -> AssemblerFailure {
533        match e {
534            RcWordAllocationFailure::RcBlockTooBig { source, .. } => {
535                let span: Span = source.span();
536                let location: LineAndColumn = body.location_of(span.start);
537                AssemblerFailure::BadProgram(OneOrMore::new(WithLocation {
538                    location,
539                    inner: ProgramError::RcBlockTooLong(source),
540                }))
541            }
542            ref e @ RcWordAllocationFailure::InconsistentTag {
543                ref tag_name,
544                span,
545                explanation: _,
546            } => {
547                let location: LineAndColumn = body.location_of(span.start);
548                AssemblerFailure::BadProgram(OneOrMore::new(WithLocation {
549                    location,
550                    inner: ProgramError::InconsistentTag {
551                        name: tag_name.clone(),
552                        span,
553                        msg: e.to_string(),
554                    },
555                }))
556            }
557        }
558    };
559
560    for directive_block in blocks.values_mut() {
561        if let Err(e) =
562            directive_block.allocate_rc_words(explicit_symtab, implicit_symtab, &mut rcblock)
563        {
564            return Err(convert_rc_failure(e));
565        }
566    }
567
568    // Now that RC-word allocation is complete, we no longer need to
569    // mutate the explicit symbol table.
570    let explicit_symtab: &ExplicitSymbolTable = &*explicit_symtab;
571
572    for name in implicit_symtab.symbols() {
573        match (implicit_symtab.get(name), explicit_symtab.get(name)) {
574            (Some(implicit), Some(explicit)) => {
575                panic!(
576                    "symbol {name} appears in both the implicit ({implicit:#?} and the explicit ({explicit:#?} symbol tables"
577                );
578            }
579            (Some(_), None) | (None, Some(_)) => (),
580            (None, None) => {
581                panic!(
582                    "symbol {name} is returned by ImplicitSymbolTable::symbols() but is not defined there"
583                );
584            }
585        }
586    }
587
588    if let Err(e) = assign_default_rc_word_tags(implicit_symtab, &mut rcblock, &mut final_symbols) {
589        return Err(convert_rc_failure(e));
590    }
591
592    // Emit the binary code.
593    for (block_id, directive_block) in blocks {
594        event!(
595            Level::DEBUG,
596            "{block_id} in output has address {0:#o} and length {1:#o}",
597            directive_block.location,
598            directive_block.emitted_word_count(),
599        );
600        if let Some(origin) = directive_block.origin.clone() {
601            // This is an origin (Users Handbook section 6-2.5) not a
602            // tag (6-2.2).
603            let span = origin.span();
604            listing.push_line(ListingLine {
605                span: Some(span),
606                rc_source: None,
607                content: None,
608            });
609        }
610
611        let words = directive_block.build_binary_block(
612            directive_block.location,
613            explicit_symtab,
614            implicit_symtab,
615            memory_map,
616            index_register_assigner,
617            &mut rcblock,
618            &mut final_symbols,
619            body,
620            listing,
621            &mut bad_symbol_definitions,
622        )?;
623        if words.is_empty() {
624            event!(
625                Level::DEBUG,
626                "{block_id} will not be included in the output because it is empty"
627            );
628        } else {
629            binary.add_chunk(BinaryChunk {
630                address: directive_block.location,
631                words,
632            });
633        }
634    }
635
636    // If the RC-word block is non-empty, emit it.
637    if !rcblock.words.is_empty() {
638        for (i, (rc_source, word)) in rcblock.words.iter().enumerate() {
639            let address = rcblock.address.index_by(
640                Unsigned18Bit::try_from(i)
641                    .expect("RC block size should be limite to physical address space"),
642            );
643
644            listing.push_rc_line(ListingLine {
645                span: None,
646                rc_source: Some(rc_source.clone()),
647                content: Some((address, *word)),
648            });
649        }
650    }
651    let chunk: BinaryChunk = rcblock.into();
652    if chunk.is_empty() {
653        event!(
654            Level::DEBUG,
655            "The RC-word block is empty, and so it will not be emitted.",
656        );
657    } else {
658        event!(
659            Level::DEBUG,
660            "Emitting RC-word block of length {:#o} words at address {:#o}.",
661            chunk.words.len(),
662            &chunk.address,
663        );
664        binary.add_chunk(chunk);
665    }
666
667    match OneOrMore::try_from_iter(bad_symbol_definitions.into_values().map(|program_error| {
668        let span = program_error.span();
669        let location = body.location_of(span.start);
670        WithLocation {
671            location,
672            inner: program_error,
673        }
674    })) {
675        Ok(errors) => Err(AssemblerFailure::BadProgram(errors)),
676        Err(_) => Ok((binary, final_symbols)),
677    }
678}
679
680/// Pass-through a `String` instance; the output quotes [control
681/// characters](char::is_control) and backslashes.
682fn cleanup_control_chars(input: String) -> String {
683    fn needs_escape(ch: char) -> bool {
684        ch == '\\' || ch.is_control()
685    }
686    let extra = input.chars().filter(|ch: &char| needs_escape(*ch)).count();
687    if extra > 0 {
688        let mut output: String = String::with_capacity(input.len().saturating_add(extra));
689        for ch in input.chars() {
690            if needs_escape(ch) {
691                output.extend(ch.escape_default());
692            } else {
693                output.push(ch);
694            }
695        }
696        output
697    } else {
698        // No need to escape anything, return the input unchanged.
699        input
700    }
701}
702
703#[test]
704fn test_cleanup_control_chars_not_control() {
705    for input in &["", "hello", "12", "X Y"] {
706        let input = (*input).to_string();
707        let output = cleanup_control_chars(input.clone());
708        assert_eq!(output, input);
709    }
710}
711
712#[test]
713fn test_cleanup_control_chars_quotes() {
714    for input in &["They're", "\"hello\"", "\"\"", "''", "Un\"balanced"] {
715        let input = (*input).to_string();
716        let output = cleanup_control_chars(input.clone());
717        assert_eq!(output, input);
718    }
719}
720
721#[test]
722fn test_cleanup_control_chars_backslash() {
723    assert_eq!(
724        cleanup_control_chars(String::from("First line\nsecond line")),
725        String::from("First line\\nsecond line")
726    );
727    assert_eq!(
728        cleanup_control_chars(String::from("\\")),
729        String::from("\\\\")
730    );
731}
732
733/// Convert raw parse errors into instances of `ProgramError`.
734fn fail_with_diagnostics(
735    source_file_body: &Source,
736    errors: OneOrMore<Rich<lexer::Token>>,
737) -> OneOrMore<WithLocation<ProgramError>> {
738    errors.into_map(|e| {
739        let span: Span = *e.span();
740        WithLocation {
741            location: source_file_body.location_of(span.start),
742            inner: ProgramError::SyntaxError {
743                span,
744                msg: cleanup_control_chars(e.to_string()),
745            },
746        }
747    })
748}
749
750/// Run all the passes of assembly, generating an output-ready
751/// [`Binary`].
752///
753/// # Arguments
754///
755/// - `source_file_body` - the contents (body) of the source file
756/// - `options` - what kind of output the user wants
757///
758/// # Errors
759///
760/// - [`AssemblerFailure::BadTapeBlock`] - output program block is too big
761/// - [`AssemblerFailure::BadProgram`] - Syntax or semantic error
762/// - [`AssemblerFailure::MachineLimitExceeded`] - Program is too large in some way
763/// - [`AssemblerFailure::InternalError`] - Encountered a bug in the parser
764pub(crate) fn assemble_source(
765    source_file_body: &str,
766    mut options: OutputOptions,
767) -> Result<Binary, AssemblerFailure> {
768    let source_file_body = Source::new(source_file_body);
769    let mut p2output = match assemble_nonempty_input(&source_file_body) {
770        AssemblerPass1Or2Output::Pass1Failed(Ok(errors)) => {
771            return Err(AssemblerFailure::BadProgram(fail_with_diagnostics(
772                &source_file_body,
773                errors,
774            )));
775        }
776        AssemblerPass1Or2Output::Pass1Failed(Err(e)) | AssemblerPass1Or2Output::Pass2Failed(e) => {
777            return Err(e);
778        }
779        AssemblerPass1Or2Output::Success(
780            errors,
781            _output_options,
782            Pass2Output {
783                directive: None, ..
784            },
785        ) if errors.is_empty() => {
786            panic!("assembly pass1 generated no errors, a directive should have been returned");
787        }
788        AssemblerPass1Or2Output::Success(errors, output_options, p2output) => {
789            if let Ok(errors) = OneOrMore::try_from_vec(errors) {
790                return Err(AssemblerFailure::BadProgram(fail_with_diagnostics(
791                    &source_file_body,
792                    errors,
793                )));
794            }
795            // No errors.
796            options = options.merge(output_options);
797            p2output
798        }
799    };
800
801    // Now we do pass 3, which generates the binary output
802    let binary = {
803        let mut listing = Listing::default();
804        let (binary, final_symbols) = assemble_pass3(
805            p2output
806                .directive
807                .expect("directive should have already been checked for None-ness"),
808            &mut p2output.explicit_symbols,
809            &mut p2output.implicit_symbols,
810            &mut p2output.memory_map,
811            &mut p2output.index_register_assigner,
812            &source_file_body,
813            &mut listing,
814        )?;
815
816        listing.set_final_symbols(final_symbols);
817        if options.list {
818            println!(
819                "{0}",
820                ListingWithBody {
821                    listing: &listing,
822                    body: &source_file_body,
823                }
824            );
825        }
826        binary
827    };
828
829    event!(
830        Level::INFO,
831        "assembly pass 3 generated {} words of binary output (not counting the reader leader)",
832        binary.count_words()
833    );
834    Ok(binary)
835}
836
837#[cfg(test)]
838fn atom_to_fragment(atom: Atom) -> InstructionFragment {
839    InstructionFragment::Arithmetic(ArithmeticExpression::from(atom))
840}
841
842#[test]
843fn test_assemble_pass1() {
844    let input = concat!("14\n", "☛☛PUNCH 26\n");
845    let expected_directive_entry_point = Some(Address::new(Unsigned18Bit::from(0o26_u8)));
846    let expected_block = ManuscriptBlock {
847        origin: None,
848        sequences: vec![InstructionSequence {
849            local_symbols: None,
850            instructions: vec![TaggedProgramInstruction {
851                span: span(0..2),
852                tags: Vec::new(),
853                instruction: UntaggedProgramInstruction::from(OneOrMore::new(
854                    CommaDelimitedFragment {
855                        leading_commas: None,
856                        holdbit: HoldBit::Unspecified,
857                        span: span(0..2),
858                        fragment: atom_to_fragment(Atom::from(LiteralValue::from((
859                            span(0..2),
860                            Script::Normal,
861                            u36!(0o14),
862                        )))),
863                        trailing_commas: None,
864                    },
865                )),
866            }],
867        }],
868    };
869
870    let mut errors = Vec::new();
871    let input_source = Source::new(input);
872    assert_eq!(
873        assemble_pass1(&input_source, &mut errors),
874        (
875            Some(SourceFile {
876                punch: Some(PunchCommand(expected_directive_entry_point)),
877                blocks: vec![expected_block],
878                global_equalities: Default::default(), // no equalities
879                macros: Default::default(),
880            }),
881            OutputOptions { list: false }
882        )
883    );
884    assert!(errors.is_empty());
885}
886
887/// Assemble input file, producing a tape image.
888///
889/// # Arguments
890///
891/// - `input_file_name` - name of the source code file
892/// - `outputoptions` - where to write the tape image
893/// - `options` - what kind of output the user wants (in
894///   addition to the tape image)
895///
896/// # Errors
897///
898/// - `AssemblerFailure::Io` - failed to read input / write output
899/// - [`AssemblerFailure::BadTapeBlock`] - output program block is too big
900/// - [`AssemblerFailure::BadProgram`] - Syntax or semantic error
901/// - [`AssemblerFailure::MachineLimitExceeded`] - Program is too large in some way
902/// - [`AssemblerFailure::InternalError`] - Encountered a bug in the parser
903///    -
904pub fn assemble_file(
905    input_file_name: &OsStr,
906    output_file_name: &Path,
907    options: OutputOptions,
908) -> Result<(), AssemblerFailure> {
909    let input_file = OpenOptions::new()
910        .read(true)
911        .open(input_file_name)
912        .map_err(|e| {
913            AssemblerFailure::Io(IoFailed {
914                action: IoAction::Read,
915                target: IoTarget::File(PathBuf::from(input_file_name)),
916                error: e,
917            })
918        })?;
919
920    let source_file_body: String = {
921        let mut body = String::new();
922        match BufReader::new(input_file).read_to_string(&mut body) {
923            Err(e) => {
924                return Err(AssemblerFailure::Io(IoFailed {
925                    action: IoAction::Read,
926                    target: IoTarget::File(PathBuf::from(input_file_name)),
927                    error: e,
928                }));
929            }
930            Ok(_) => body,
931        }
932    };
933
934    let user_program: Binary = assemble_source(&source_file_body, options)?;
935
936    // The Users Guide explains on page 6-23 how the punched binary
937    // is created (and read back in).
938    let output_file = OpenOptions::new()
939        .create(true)
940        .write(true)
941        .truncate(true)
942        .open(output_file_name)
943        .map_err(|e| {
944            AssemblerFailure::Io(IoFailed {
945                action: IoAction::Write,
946                target: IoTarget::File(PathBuf::from(output_file_name)),
947                error: e,
948            })
949        })?;
950    let mut writer = BufWriter::new(output_file);
951    write_user_program(&user_program, &mut writer, output_file_name)
952}
953
954#[test]
955fn test_duplicate_global_tag() {
956    let input = concat!("TGX->0\n", "TGX->0\n");
957    let input_source = Source::new(input);
958    match assemble_nonempty_input(&input_source) {
959        AssemblerPass1Or2Output::Pass2Failed(AssemblerFailure::BadProgram(errors)) => {
960            dbg!(&errors);
961            match errors.first() {
962                WithLocation {
963                    inner: ProgramError::SyntaxError { msg, span: _ },
964                    ..
965                } => {
966                    assert!(
967                        msg.contains(
968                            "bad symbol definition for TGX: TGX is defined more than once"
969                        )
970                    );
971                }
972                other => {
973                    panic!("expected a syntax error report, got {other:?}");
974                }
975            }
976        }
977        AssemblerPass1Or2Output::Success(errors, _output_options, p2output) => {
978            dbg!(&errors);
979            dbg!(&p2output);
980            panic!("assembler unexpectedly succeeded with a bad input {input}");
981        }
982        unexpected => {
983            panic!("assembly failed, but not in the way expected by this test: {unexpected:?}");
984        }
985    }
986}