Source code for ducky.cpu.assemble

import collections
import ctypes
import functools
import mmap
import os.path
import re

from six import iteritems, itervalues, integer_types, string_types, PY2

from .. import cpu
from .. import mm
from ..cpu.coprocessor.math_copro import MathCoprocessorInstructionSet  # noqa - it's not unused, SIS instruction may need it but that's hidden from flake
from ..cpu.instructions import encoding_to_u32

from ..mm import u8_t, PAGE_SIZE, UINT32_FMT
from ..mm.binary import SectionTypes, SectionFlags, SymbolFlags, RelocFlags
from ..util import align, str2bytes
from ..errors import AssemblerError, IncompleteDirectiveError, UnknownFileError, DisassembleMismatchError, UnknownPatternError, TooManyLabelsError

align_to_next_page = functools.partial(align, PAGE_SIZE)
align_to_next_mmap = functools.partial(align, mmap.PAGESIZE)

[docs]def PATTERN(pattern): return re.compile(r'^\s*(?P<payload>' + pattern + r')(?:\s*[;#].*)?$', re.MULTILINE)
RE_INTEGER = re.compile(r'^\s+(?:(?P<value_hex>-?0x[a-fA-F0-9]+)|(?P<value_dec>0|(?:-?[1-9][0-9]*))|(?P<value_var>[a-zA-Z][a-zA-Z0-9_]*(?:.*?)?)|(?P<value_label>&[a-zA-Z_\.][a-zA-Z0-9_\.]*))\s*$') RE_STR = re.compile(r'^\s*"(?P<string>.*?)"\s*$') RE_COMMENT = re.compile(r'^\s*[/;].*?$', re.MULTILINE) RE_INCLUDE = PATTERN(r'\.include\s+"(?P<file>[a-zA-Z0-9_\-/\.]+)\s*"') RE_IFDEF = PATTERN(r'.ifdef\s+(?P<var>[a-zA-Z0-9_]+)$') RE_IFNDEF = PATTERN(r'\.ifndef\s+(?P<var>[a-zA-Z0-9_]+)') RE_ELSE = PATTERN(r'\.else') RE_ENDIF = PATTERN(r'\.endif') RE_VAR_DEF = PATTERN(r'\.def\s+(?P<var_name>[a-zA-Z][a-zA-Z0-9_]*):\s*(?P<var_body>.*?)') RE_MACRO_DEF = re.compile(r'^\s*\.macro\s+(?P<macro_name>[a-zA-Z_][a-zA-Z0-9_]*)(?:\s+(?P<macro_params>.*?))?:$', re.MULTILINE | re.DOTALL) RE_MACRO_END = PATTERN(r'\.end') RE_ASCII = PATTERN(r'\.ascii\s+(?P<string>".*?")') RE_BYTE = PATTERN(r'\.byte(?P<integer>.*?)') RE_DATA = PATTERN(r'\.data(?:\s+(?P<name>\.[a-z][a-z0-9_]*))?') RE_INT = PATTERN(r'\.int(?P<integer>.*?)') RE_LABEL = PATTERN(r'(?P<label>[a-zA-Z_\.][a-zA-Z0-9_\.]*):') RE_SECTION = PATTERN(r'\.section\s+(?P<name>\.[a-zA-z0-9_]+)(?:,\s*(?P<flags>[rwxlbmg]*))?') RE_SET = PATTERN(r'\.set\s+(?P<name>[a-zA-Z_][a-zA-Z0-9_]*),\s*(?:(?P<current>\.)|(?P<value_hex>-?0x[a-fA-F0-9]+)|(?P<value_dec>0|(?:-?[1-9][0-9]*))|(?P<value_label>&[a-zA-Z][a-zA-Z0-9_]*))') RE_SHORT = PATTERN(r'\.short(?P<integer>.*?)') RE_SIZE = PATTERN(r'\.size\s+(?P<size>[1-9][0-9]*)') RE_SPACE = PATTERN(r'\.space\s+(?P<size>[1-9][0-9]*)') RE_STRING = PATTERN(r'\.string(?P<string>.*?)') RE_TEXT = PATTERN(r'\.text(?:\s+(?P<name>\.[a-z][a-z0-9_]*))?') RE_TYPE = PATTERN(r'\.type\s+(?P<name>[a-zA-Z_\.][a-zA-Z0-9_]*),\s*(?P<type>(?:char|byte|short|int|ascii|string|space))') RE_GLOBAL = PATTERN(r'\.global\s+(?P<name>[a-zA-Z_][a-zA-Z0-9_]*)') RE_ALIGN = PATTERN(r'\.align\s+(?P<boundary>[0-9]+)')
[docs]class SourceLocation(object): def __init__(self, filename = None, lineno = None, column = None, length = None): self.filename = filename self.lineno = lineno self.column = column self.length = length
[docs] def copy(self): return SourceLocation(filename = self.filename, lineno = self.lineno, column = self.column, length = self.length)
def __str__(self): t = [self.filename, str(self.lineno)] if self.column is not None: t.append(str(self.column)) return ':'.join(t) def __repr__(self): return str(self)
[docs]class Buffer(object): def __init__(self, logger, filename, buff): super(Buffer, self).__init__() self.logger = logger self.DEBUG = logger.debug self.INFO = logger.info self.WARN = logger.warning self.ERROR = logger.error self.EXCEPTION = logger.exception self.buff = buff self.location = SourceLocation(filename = filename, lineno = 0) self.last_line = None
[docs] def get_line(self): while self.buff: self.location.lineno += 1 line = self.buff.pop(0) if isinstance(line, SourceLocation): self.location = line self.DEBUG('buffer: file switch: %s', str(self.location)) continue if not line: continue self.DEBUG('buffer: new line %s: %s', str(self.location), line) self.last_line = line return line self.last_line = None return None
[docs] def put_line(self, line): self.buff.insert(0, line) self.location.lineno -= 1
[docs] def put_buffer(self, buff, filename = None): filename = filename or '<unknown>' self.buff.insert(0, self.location.copy()) if isinstance(buff, string_types): buff = buff.split('\n') for line in reversed(buff): self.buff.insert(0, line) self.buff.insert(0, SourceLocation(filename = filename, lineno = 0))
[docs] def has_lines(self): return len(self.buff) > 0
[docs] def get_error(self, cls, info, column = None, length = None, **kwargs): location = self.location.copy() location.column = column location.length = length kwargs['location'] = location if 'line' not in kwargs: kwargs['line'] = self.last_line kwargs['info'] = info return cls(**kwargs)
[docs]class Section(object): def __init__(self, s_name, s_type, s_flags): super(Section, self).__init__() self.name = s_name self.type = s_type self.flags = s_flags self.content = [] self.base = None self.ptr = 0 def __getattr__(self, name): if name == 'data_size': return sum([sizeof(i) for i in self.content]) if name == 'file_size': return align_to_next_mmap(self.data_size) if self.flags.mmapable == 1 else self.data_size if name == 'items': return len(self.content) def __repr__(self): return '<Section: name=%s, type=%s, flags=%s, base=%s, ptr=%s, items=%s, data_size=%s, file_size=%s>' % (self.name, self.type, self.flags.to_string(), UINT32_FMT(self.base), UINT32_FMT(self.ptr), self.items, self.data_size, self.file_size)
[docs]class TextSection(Section): def __init__(self, s_name, flags = None, **kwargs): super(TextSection, self).__init__(s_name, SectionTypes.TEXT, flags or SectionFlags.create(readable = True, executable = True, loadable = True))
[docs]class RODataSection(Section): def __init__(self, s_name, flags = None, **kwargs): super(RODataSection, self).__init__(s_name, SectionTypes.DATA, flags or SectionFlags.create(readable = True, loadable = True))
[docs]class DataSection(Section): def __init__(self, s_name, flags = None, **kwargs): super(DataSection, self).__init__(s_name, SectionTypes.DATA, flags or SectionFlags.create(readable = True, writable = True, loadable = True))
[docs]class BssSection(Section): def __init__(self, s_name, flags = None, **kwargs): super(BssSection, self).__init__(s_name, SectionTypes.DATA, flags or SectionFlags.create(readable = True, writable = True, loadable = True, bss = True))
[docs]class SymbolsSection(Section): def __init__(self, s_name, flags = None, **kwargs): super(SymbolsSection, self).__init__(s_name, SectionTypes.SYMBOLS, SectionFlags.create())
[docs]class RelocSection(Section): def __init__(self, s_name, flags = None, **kwargs): super(RelocSection, self).__init__(s_name, SectionTypes.RELOC, SectionFlags.create())
[docs]class Label(object): def __init__(self, name, section, location): super(Label, self).__init__() self.name = name self.section = section self.location = location def __repr__(self): return '<label {} in section {} ({})>'.format(self.name, self.section.name if self.section else None, str(self.location))
[docs]class Reference(object): def __init__(self, add = None, label = None): self.add = add or 0 self.label = label def __repr__(self): return '<Reference: label=%s, add=%s>' % (self.label, self.add)
[docs]class RelocSlot(object): def __init__(self, name, flags = None, patch_section = None, patch_address = None, patch_offset = None, patch_size = None, patch_add = None): super(RelocSlot, self).__init__() self.name = name self.flags = flags or RelocFlags.create() self.patch_section = patch_section self.patch_address = patch_address self.patch_offset = patch_offset self.patch_size = patch_size self.patch_add = patch_add self.size = 0 def __repr__(self): return '<RelocSlot: name=%s, flags=%s, section=%s, address=%s, offset=%s, size=%s, add=%s>' % (self.name, self.flags.to_string(), self.patch_section, UINT32_FMT(self.patch_address), self.patch_offset, self.patch_size, self.patch_add)
[docs]class DataSlot(object): def __init__(self): super(DataSlot, self).__init__() self.name = None self.size = None self.refers_to = None self.value = None self.flags = SymbolFlags.create() self.section = None self.section_ptr = None self.location = None
[docs] def close(self): pass
[docs]class ByteSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.CHAR
[docs] def close(self): self.size = 1 if self.refers_to: return self.value = [u8_t(self.value or 0)]
def __repr__(self): return '<ByteSlot: name={}, size={}, section={}, value={}>'.format(self.name, self.size, self.section.name if self.section else '', self.value)
[docs]class ShortSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.SHORT
[docs] def close(self): self.size = 2 if self.refers_to: return value = self.value or 0 self.value = [u8_t(value), u8_t(value >> 8)]
def __repr__(self): return '<ShortSlot: name={}, size={}, section={}, value={}, refers_to={}>'.format(self.name, self.size, self.section.name if self.section else '', self.value, self.refers_to)
[docs]class IntSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.INT
[docs] def close(self): self.size = 4 if self.refers_to: return value = self.value or 0 self.value = [u8_t(value), u8_t(value >> 8), u8_t(value >> 16), u8_t(value >> 24)]
def __repr__(self): return '<IntSlot: name={}, size={}, section={}, value={}, refers_to={}>'.format(self.name, self.size, self.section.name if self.section else '', self.value, self.refers_to)
[docs]class CharSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.CHAR
[docs] def close(self): self.size = 1 self.value = u8_t(ord(self.value or '\0'))
def __repr__(self): return '<CharSlot: name={}, section={}, value={}>'.format(self.name, self.section.name if self.section else '', self.value)
[docs]class SpaceSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.ASCII
[docs] def close(self): self.value = None self.size = self.size
def __repr__(self): return '<SpaceSlot: name={}, size={}, section={}>'.format(self.name, self.size, self.section.name if self.section else '')
[docs]class AsciiSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.ASCII
[docs] def close(self): self.value = self.value or '' self.value = [u8_t(ord(c)) for c in self.value] self.size = len(self.value)
def __repr__(self): return '<AsciiSlot: name={}, size={}, section={}, value={}>'.format(self.name, self.size, self.section.name if self.section else '', self.value)
[docs]class StringSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.STRING
[docs] def close(self): self.value = self.value or '' self.value = [u8_t(ord(c)) for c in self.value] + [u8_t(0)] self.size = len(self.value)
def __repr__(self): return '<StringSlot: name={}, size={}, section={}, value={}>'.format(self.name, self.size, self.section.name if self.section else '', self.value)
[docs]class BytesSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.ASCII
[docs] def close(self): self.value = self.value or '' self.value = [u8_t(b) for b in self.value] self.size = len(self.value)
def __repr__(self): return '<BytesSlot: name={}, size={}, section={}, value={}>'.format(self.name, self.size, self.section.name if self.section else '', self.value)
[docs]class AlignSlot(DataSlot): def __init__(self, boundary): super(AlignSlot, self).__init__() self.boundary = boundary def __repr__(self): return '<AlignSlot: boundary={}>'.format(self.boundary)
[docs]class FunctionSlot(DataSlot): symbol_type = mm.binary.SymbolDataTypes.FUNCTION
[docs] def close(self): self.size = 0
def __repr__(self): return '<FunctionSlot: name={}, section={}>'.format(self.name, self.section.name if self.section else '')
[docs]def sizeof(o): if isinstance(o, RelocSlot): return 0 if isinstance(o, DataSlot): return o.size if isinstance(o, ctypes.LittleEndianStructure): return ctypes.sizeof(o) return ctypes.sizeof(o)
if PY2: def decode_string(s): return s.decode('string_escape') else:
[docs] def decode_string(s): return str2bytes(s).decode('unicode_escape')
[docs]def translate_buffer(logger, buff, base_address = None, mmapable_sections = False, writable_sections = False, filename = None, defines = None, includes = None, verify_disassemble = False): DEBUG = logger.debug base_address = base_address or 0 filename = filename or '<unknown>' defines = defines or [] includes = includes or [] includes.insert(0, os.getcwd()) defines = {var[0]: var[1] if len(var) > 1 else None for var in (var.split('=') for var in defines)} DEBUG('translate_buffer: base_addres=%s, mmapable_sections=%s, writable_sections=%s, filename=%s, defines=%s, includes=%s, verify_disassemble=%s', UINT32_FMT(base_address), mmapable_sections, writable_sections, filename, defines, includes, verify_disassemble) buff = Buffer(logger, filename, buff.split('\n')) sections_pass1 = collections.OrderedDict([ ('.text', TextSection('.text')), ('.rodata', RODataSection('.rodata')), ('.data', DataSection('.data')), ('.bss', BssSection('.bss')), ('.symtab', SymbolsSection('.symtab')), ('.reloc', RelocSection('.reloc')) ]) if mmapable_sections: for section in itervalues(sections_pass1): section.flags.mmapable = True if writable_sections: for section in [_section for _section in itervalues(sections_pass1) if _section.name in ('.text', '.rodata', '.data', '.bss')]: section.flags.writable = True DEBUG('Pass #1') labeled = [] line = None def __apply_defs(line): orig_line = line for def_pattern, def_value in iteritems(defs): line = def_pattern.sub(def_value, line) if orig_line != line: DEBUG(msg_prefix + 'variables replaced: line="%s"', line) return line def __apply_macros(line): for m_pattern, m_desc in iteritems(macros): matches = m_pattern.match(line) if not matches: continue DEBUG(msg_prefix + 'replacing macro: name=%s', m_desc['name']) if m_desc['params']: matches = matches.groupdict() replace_map = {} for i in range(0, len(m_desc['params'])): replace_map[re.compile(r'#{}'.format(m_desc['params'][i]))] = matches['arg{}'.format(i)] DEBUG(msg_prefix + 'macro args: %s', ', '.join(['{} => {}'.format(pattern.pattern, repl) for pattern, repl in iteritems(replace_map)])) body = [] for line in m_desc['body']: for pattern, repl in iteritems(replace_map): line = pattern.sub(repl, line) body.append(line) buff.put_buffer(body) else: buff.put_buffer(m_desc['body']) return True return False def __set_var_location(var, location = None): if var.location is None: if location is None: location = buff.location.copy() var.location = location def __set_var_label(var): if len(labels) > 1: raise buff.get_error(TooManyLabelsError, 'Too many labels', column = 0) var.name = labels[0] if labels else None def __parse_integer(var, matches, max): __set_var_location(var) groupdict = matches.groupdict() integer = groupdict.get('integer') DEBUG('__parse_integer: var=%s, max=%s, matches=%s', var, max, groupdict) if integer is None or not integer: raise buff.get_error(IncompleteDirectiveError, 'directive without a value specification', column = matches.end(1)) integer_start = matches.start(2) matches = RE_INTEGER.match(integer) if matches is None: raise buff.get_error(IncompleteDirectiveError, 'directive without a meaningful value', column = integer_start) groupdict = matches.groupdict() DEBUG('__parse_integer: matches=%s', groupdict) v_value = groupdict.get('value_dec') if v_value: var.value = int(v_value) DEBUG('__parse_integer: var=%s', var) return v_value = groupdict.get('value_hex') if v_value: var.value = int(v_value, base = 16) DEBUG('__parse_integer: var=%s', var) return v_value = groupdict.get('value_var') if v_value: if v_value not in variables: raise buff.get_error(IncompleteDirectiveError, 'unknown variable named "%s"' % v_value, column = integer_start) variable = variables[v_value] DEBUG('__parse_integer: variable: variable=%s', variable) if isinstance(variable, integer_types): var.value = variable else: var.refers_to = Reference(label = variable) DEBUG('__parse_integer: var=%s', var) return v_value = groupdict.get('value_label') if v_value: var.refers_to = Reference(label = v_value) DEBUG('__parse_integer: var=%s', var) return raise buff.get_error(IncompleteDirectiveError, 'directive without a meaningful value', column = integer_start) def __parse_string(var, matches): __set_var_location(var) groupdict = matches.groupdict() string = groupdict.get('string') DEBUG('__parse_string: var=%s, max=%s, matches=%s', var, max, groupdict) if string is None or not string: raise buff.get_error(IncompleteDirectiveError, 'directive without a value specification', column = matches.end(1)) string_start = matches.start(2) matches = RE_STR.match(string) if matches is None: raise buff.get_error(IncompleteDirectiveError, 'directive without a meaningful value', column = string_start) groupdict = matches.groupdict() v_value = groupdict.get('string') if not v_value: raise buff.get_error(IncompleteDirectiveError, 'directive without a meaningful value', column = string_start) v_value = __apply_defs(v_value) DEBUG('Pre-decode: (%s) %s', type(v_value), ', '.join([str(ord(c)) for c in v_value])) var.value = decode_string(v_value) DEBUG('Post-decode: (%s) %s', type(var.value), ', '.join([str(ord(c)) for c in var.value])) def __parse_space(var, matches): __set_var_location(var) matches = matches.groupdict() if 'size' not in matches: raise buff.get_error(IncompleteDirectiveError, '.size directive without a size') var.size = int(matches['size']) def __handle_symbol_variable(v_name, v_type): if v_type == 'char': var = CharSlot() elif v_type == 'byte': var = ByteSlot() elif v_type == 'short': var = ShortSlot() elif v_type == 'int': var = IntSlot() elif v_type == 'ascii': var = AsciiSlot() elif v_type == 'string': var = StringSlot() elif v_type == 'space': var = SpaceSlot() var.name = Label(v_name, curr_section, buff.location.copy()) __set_var_location(var) while buff.has_lines() and var.value is None and var.refers_to is None: line = buff.get_line() if line is None: var.close() data_section.content.append(var) return matches = RE_COMMENT.match(line) if matches: continue msg_prefix = 'pass #1: %s: ' % str(buff.location) line = __apply_defs(line) if not current_macro and __apply_macros(line): DEBUG(msg_prefix + 'macro replaced, get fresh line') continue matches = RE_TYPE.match(line) if matches: buff.put_line(line) break matches = RE_SIZE.match(line) if matches: matches = matches.groupdict() if 'size' not in matches: raise buff.get_error(IncompleteDirectiveError, '.size directive without a size') var.size = int(matches['size']) continue matches = RE_SHORT.match(line) if matches: __parse_integer(var, matches, 0xFFFF) continue matches = RE_INT.match(line) if matches: __parse_integer(var, matches, 0xFFFFFFFF) continue matches = RE_ASCII.match(line) if matches: __parse_string(var, matches) continue matches = RE_STRING.match(line) if matches: __parse_string(var, matches) continue matches = RE_SPACE.match(line) if matches: __parse_space(var, matches) continue matches = RE_BYTE.match(line) if matches: __parse_integer(var, matches, 0xFF) continue buff.put_line(line) break var.close() data_section.content.append(var) labels = [] variables = {} instruction_set = cpu.instructions.DuckyInstructionSet defs = collections.OrderedDict() for name, value in iteritems(defines): if value is None: continue defs[re.compile(r'\${}'.format(name))] = value.strip() macros = collections.OrderedDict() current_macro = None DEBUG('Pass #1: text section is .text') DEBUG('Pass #1: data section is .data') text_section = sections_pass1['.text'] data_section = sections_pass1['.data'] curr_section = text_section global_symbols = [] ifs = [] def __fast_forward(): DEBUG(msg_prefix + 'fast forwarding') depth = 1 while buff.has_lines(): line = buff.get_line() if line is None: return if not line.strip(): continue matches = RE_IFDEF.match(line) if matches: depth += 1 continue matches = RE_IFNDEF.match(line) if matches: depth += 1 continue matches = RE_ENDIF.match(line) if matches: depth -= 1 if depth == 0: buff.put_line(line) return matches = RE_ELSE.match(line) if matches: depth -= 1 if depth == 0: buff.put_line(line) return while buff.has_lines(): line = buff.get_line() if line is None: break if not line.strip(): continue msg_prefix = 'pass #1: %s: ' % str(buff.location) line = __apply_defs(line) if not current_macro and __apply_macros(line): DEBUG(msg_prefix + 'macro replaced, get fresh line') continue matches = RE_COMMENT.match(line) if matches: continue msg_prefix = 'pass #1: %s: ' % str(buff.location) matches = RE_IFDEF.match(line) if matches: var = matches.groupdict()['var'] DEBUG(msg_prefix + 'ifdef %s', var) ifs.append((True, var)) if var in defines: DEBUG(msg_prefix + 'defined, continue processing') continue __fast_forward() continue matches = RE_IFNDEF.match(line) if matches: var = matches.groupdict()['var'] DEBUG(msg_prefix + 'ifndef %s', var) ifs.append((False, var)) if var not in defines: DEBUG(msg_prefix + 'not defined, continue processing') continue __fast_forward() continue matches = RE_ENDIF.match(line) if matches: DEBUG(msg_prefix + 'removing the last conditional from stack: %s', ifs[-1]) ifs.pop() continue matches = RE_ELSE.match(line) if matches: defined, var = ifs.pop() DEBUG(msg_prefix + 'previous block was "%s %s"', 'ifdef' if defined is True else 'ifndef', var) ifs.append((not defined, var)) if defined and var in defines: __fast_forward() continue DEBUG(msg_prefix + 'continue processing') continue matches = RE_INCLUDE.match(line) if matches: groupdict = matches.groupdict() if 'file' not in groupdict: raise buff.get_error(IncompleteDirectiveError, '.include directive without path', column = 0) DEBUG(msg_prefix + 'include: file=%s', groupdict['file']) replace = None for d in includes: filename = os.path.join(d, groupdict['file']) DEBUG(msg_prefix + ' checking file %s', filename) try: with open(filename, 'r') as f_in: replace = f_in.read() except IOError: DEBUG(' failed to read') pass # "empty body on ExceptHandler" without this, because of patching else: DEBUG(' read as replacement') break if replace is None: raise buff.get_error(UnknownFileError, groupdict['file'], column = matches.start(2)) buff.put_buffer(replace, filename = filename) continue matches = RE_VAR_DEF.match(line) if matches: matches = matches.groupdict() v_name = matches.get('var_name') v_body = matches.get('var_body') if not v_name or not v_body: raise buff.get_error(IncompleteDirectiveError, 'bad variable definition') DEBUG(msg_prefix + 'variable defined: name=%s, value=%s', v_name, v_body) defs[re.compile(r'\${}'.format(v_name))] = v_body.strip() continue matches = RE_MACRO_DEF.match(line) if matches: matches = matches.groupdict() m_name = matches.get('macro_name') m_params = matches.get('macro_params') if not m_name: raise buff.get_error(IncompleteDirectiveError, 'bad macro definition') DEBUG(msg_prefix + 'macro defined: name=%s', m_name) if current_macro: raise buff.get_error(AssemblerError, 'overlapping macro definitions') current_macro = { 'name': m_name, 'pattern': None, 'params': [p.strip() for p in m_params.strip().split(',')] if m_params else [], 'body': [] } if current_macro['params'] and len(current_macro['params'][0]): arg_pattern = r'(?P<arg%i>(?:".*?")|(?:.*?))' arg_patterns = ',\s*'.join([arg_pattern % i for i in range(0, len(current_macro['params']))]) current_macro['pattern'] = re.compile(r'^\s*\${}\s+{}\s*(?:[;/#].*)?$'.format(m_name, arg_patterns), re.MULTILINE) else: current_macro['pattern'] = re.compile(r'\s*\${}'.format(m_name)) continue matches = RE_MACRO_END.match(line) if matches: if not current_macro: raise buff.get_error(AssemblerError, 'closing non-existing macro') macros[current_macro['pattern']] = current_macro DEBUG(msg_prefix + 'macro definition closed: name=%s', current_macro['name']) current_macro = None continue if current_macro: current_macro['body'].append(line) continue matches = RE_SECTION.match(line) if matches: matches = matches.groupdict() if 'name' not in matches: raise buff.get_error(IncompleteDirectiveError, '.section directive without section name') s_name = matches['name'] if s_name not in sections_pass1: section_flags = SectionFlags.from_string(matches.get('flags') or '') section_type = SectionTypes.TEXT if section_flags.executable is True else SectionTypes.DATA section = sections_pass1[s_name] = Section(s_name, section_type, section_flags) DEBUG(msg_prefix + 'section %s created', s_name) curr_section = sections_pass1[s_name] if curr_section.type == SectionTypes.TEXT: text_section = curr_section DEBUG(msg_prefix + 'text section changed to %s', s_name) else: data_section = curr_section DEBUG(msg_prefix + 'data section changed to %s', s_name) continue matches = RE_DATA.match(line) if matches: matches = matches.groupdict() curr_section = data_section = sections_pass1[matches['name'] if 'name' in matches and matches['name'] else '.data'] DEBUG(msg_prefix + 'data section is %s', data_section.name) continue matches = RE_TEXT.match(line) if matches: matches = matches.groupdict() curr_section = text_section = sections_pass1[matches['name'] if 'name' in matches and matches['name'] else '.text'] DEBUG(msg_prefix + 'text section is %s', text_section.name) continue matches = RE_TYPE.match(line) if matches: matches = matches.groupdict() if 'type' not in matches: raise buff.get_error(IncompleteDirectiveError, '.type directive without a type') if 'name' not in matches: raise buff.get_error(IncompleteDirectiveError, '.type directive without a name') __handle_symbol_variable(matches['name'], matches['type']) continue matches = RE_BYTE.match(line) if matches: var = ByteSlot() __parse_integer(var, matches, 0xFF) __set_var_label(var) var.close() DEBUG(msg_prefix + 'record byte value: name=%s, value=%s', var.name, var.value) data_section.content.append(var) labels = [] continue matches = RE_SHORT.match(line) if matches: var = ShortSlot() __parse_integer(var, matches, 0xFFFF) __set_var_label(var) var.name = labels[0] if labels else None var.close() DEBUG(msg_prefix + 'record byte value: name=%s, value=%s', var.name, var.value) data_section.content.append(var) labels = [] continue matches = RE_INT.match(line) if matches: var = IntSlot() __parse_integer(var, matches, 0xFFFFFFFF) __set_var_label(var) var.close() DEBUG(msg_prefix + 'record int value: name=%s, value=%s, refers_to=%s', var.name, var.value, var.refers_to) data_section.content.append(var) labels = [] continue matches = RE_ASCII.match(line) if matches: var = AsciiSlot() __parse_string(var, matches) __set_var_label(var) var.close() DEBUG(msg_prefix + 'record ascii value: name=%s, value=%s', var.name, var.value) data_section.content.append(var) labels = [] continue matches = RE_STRING.match(line) if matches: var = StringSlot() __parse_string(var, matches) __set_var_label(var) var.close() DEBUG(msg_prefix + 'record string value: name=%s, value=%s', var.name, var.value) data_section.content.append(var) labels = [] continue matches = RE_SPACE.match(line) if matches: var = SpaceSlot() __parse_space(var, matches) __set_var_label(var) var.close() DEBUG(msg_prefix + 'record space: name=%s, value=%s', var.name, var.size) data_section.content.append(var) labels = [] continue matches = RE_ALIGN.match(line) if matches: matches = matches.groupdict() if 'boundary' not in matches: raise buff.get_error(IncompleteDirectiveError, '.align directive without boundary') var = AlignSlot(int(matches['boundary'])) DEBUG(msg_prefix + 'align: boundary=%s', var.boundary) data_section.content.append(var) continue matches = RE_GLOBAL.match(line) if matches: matches = matches.groupdict() if 'name' not in matches: raise buff.get_error(IncompleteDirectiveError, '.global directive without variable') name = matches['name'] global_symbols.append(name) continue matches = RE_SET.match(line) if matches: matches = matches.groupdict() if 'name' not in matches: raise buff.get_error(IncompleteDirectiveError, '.set directive without variable') name = matches['name'] if matches.get('current'): value = (curr_section.name, curr_section.ptr) elif matches.get('value_dec'): value = int(matches['value_dec']) elif matches.get('value_hex'): value = int(matches['value_hex'], base = 16) elif matches.get('value_label'): value = matches['value_label'] else: raise buff.get_error(IncompleteDirectiveError, '.set directive with unknown value') DEBUG(msg_prefix + 'set variable: name=%s, value=%s', name, value) variables[name] = value continue matches = RE_LABEL.match(line) if matches: DEBUG('matches: %s', matches.groupdict()) loc = buff.location.copy() loc.column = matches.start(2) label = Label(matches.groupdict()['label'], curr_section, loc) labels.append(label) DEBUG(msg_prefix + 'record label: name=%s', label.name) continue DEBUG(msg_prefix + 'line: %s', line) # label, instruction, 2nd pass flags emited_inst = None # Find instruction descriptor DEBUG(msg_prefix + 'instr set: %s', instruction_set) for desc in instruction_set.instructions: if not desc.pattern.match(line): continue break else: raise buff.get_error(UnknownPatternError, line, column = 0) emited_inst = desc.emit_instruction(logger, buff, line) emited_inst.desc = desc if labels: text_section.content.append((labels, emited_inst)) else: text_section.content.append((None, emited_inst)) labels = [] emited_inst_disassemble = emited_inst.desc.instruction_set.disassemble_instruction(logger, emited_inst) DEBUG(msg_prefix + 'emitted instruction: %s (%s)', emited_inst_disassemble, UINT32_FMT(encoding_to_u32(emited_inst))) if verify_disassemble and line != emited_inst_disassemble: raise buff.get_error(DisassembleMismatchError, 'input="%s", emitted="%s"' % (line, emited_inst_disassemble)) if isinstance(desc, cpu.instructions.SIS): DEBUG(msg_prefix + 'switching istruction set: inst_set=%s', emited_inst.immediate) instruction_set = cpu.instructions.get_instruction_set(emited_inst.immediate) for s_name, section in iteritems(sections_pass1): DEBUG('pass #1: section %s', s_name) if section.type == SectionTypes.TEXT: for labeled, inst in section.content: DEBUG('pass #1: inst=%s, labeled=%s', inst, labeled) else: for var in section.content: DEBUG('pass #1: %s', var) DEBUG('Pass #2') sections_pass2 = collections.OrderedDict() references = {} base_ptr = base_address for s_name, p1_section in iteritems(sections_pass1): section = sections_pass2[s_name] = Section(s_name, p1_section.type, p1_section.flags) symtab = sections_pass2['.symtab'] reloctab = sections_pass2['.reloc'] for s_name, section in iteritems(sections_pass2): p1_section = sections_pass1[s_name] section.base = base_ptr section.ptr = base_ptr DEBUG('pass #2: section %s - base=%s', section.name, UINT32_FMT(section.base)) if section.type == SectionTypes.SYMBOLS or section.type == SectionTypes.RELOC: continue if section.type == SectionTypes.DATA: for var in p1_section.content: ptr_prefix = 'pass #2: ' + UINT32_FMT(section.ptr) + ': ' DEBUG(ptr_prefix + str(var)) if isinstance(var, AlignSlot): aligned_ptr = align(var.boundary, section.ptr) padding_bytes = aligned_ptr - section.ptr if padding_bytes == 0: DEBUG(ptr_prefix + ' align %s to multiple of %s: aligned already, ignore', UINT32_FMT(section.ptr), var.boundary) continue padding = SpaceSlot() padding.size = padding_bytes DEBUG(ptr_prefix + ' align %s to multiple of %s: %s padding bytes => %s', UINT32_FMT(section.ptr), var.boundary, padding.size, UINT32_FMT(section.ptr + padding.size)) DEBUG(ptr_prefix + ' %s', padding) section.content.append(padding) section.ptr += padding.size DEBUG(ptr_prefix + ' padding stored') continue if var.name: var.section = section var.section_ptr = section.ptr references['&' + var.name.name] = var symtab.content.append(var) if var.refers_to is not None: reference = var.refers_to DEBUG(ptr_prefix + ' refers to: %s', reference) if reference.label is not None: reloc = RelocSlot(reference.label[1:], patch_section = section, patch_address = section.ptr, patch_offset = 0, patch_size = 16, patch_add = reference.add) DEBUG(ptr_prefix + ' reloc slot created: %s', reloc) else: raise Exception() reloctab.content.append(reloc) var.refers_to = None if isinstance(var, IntSlot): if var.value is not None: section.content += var.value DEBUG(ptr_prefix + ' value stored') else: section.content.append(var) DEBUG(ptr_prefix + ' value missing - reserve space, fix in next pass') if var.size != 4: raise Exception() section.ptr += var.size elif isinstance(var, ShortSlot): if var.value is not None: section.content += var.value DEBUG(ptr_prefix + ' value stored') else: section.content.append(var) DEBUG(ptr_prefix + ' value missing - reserve space, fix in next pass') section.ptr += var.size elif isinstance(var, ByteSlot): if var.value is not None: section.content += var.value DEBUG(ptr_prefix + ' value stored') else: section.content.append(var) DEBUG(ptr_prefix + ' value missing - reserve space, fix in next pass') section.ptr += var.size elif type(var) == AsciiSlot or type(var) == StringSlot or isinstance(var, BytesSlot): section.content += var.value section.ptr += var.size DEBUG(ptr_prefix + ' value stored') elif type(var) == SpaceSlot: section.content.append(var) section.ptr += var.size DEBUG(ptr_prefix + ' value stored') if section.type == SectionTypes.TEXT: for labeled, inst in p1_section.content: ptr_prefix = 'pass #2: ' + UINT32_FMT(section.ptr) + ': ' DEBUG(ptr_prefix + '%s (%s)', inst.desc.instruction_set.disassemble_instruction(logger, inst), UINT32_FMT(encoding_to_u32(inst))) inst.address = section.ptr if labeled: for label in labeled: var = FunctionSlot() var.name = label var.section = section var.section_ptr = section.ptr __set_var_location(var, location = label.location) var.close() symtab.content.append(var) references['&' + label.name] = var DEBUG(ptr_prefix + 'label entry "%s" created', label) if inst.desc.operands and ('i' in inst.desc.operands or 'j' in inst.desc.operands) and hasattr(inst, 'refers_to') and inst.refers_to is not None: DEBUG(ptr_prefix + 'refers to: label=%s, relative=%s, inst_aligned=%s', inst.refers_to, inst.desc.relative_address, inst.desc.inst_aligned) DEBUG(ptr_prefix + 'refers to: %s', inst.refers_to) reloc = RelocSlot(inst.refers_to.label[1:], flags = RelocFlags.create(relative = inst.desc.relative_address, inst_aligned = inst.desc.inst_aligned), patch_section = section, patch_address = section.ptr) inst.fill_reloc_slot(logger, inst, reloc) sections_pass2['.reloc'].content.append(reloc) if inst.refers_to in references: reloc.patch_section = references[inst.refers_to].section DEBUG(ptr_prefix + 'reloc slot created: %s', reloc) inst.refers_to = None section.content.append(inst) section.ptr += 4 base_ptr = align_to_next_mmap(section.ptr) if mmapable_sections else align_to_next_page(section.ptr) DEBUG('Pass #3') sections_pass3 = {} for s_name, p2_section in iteritems(sections_pass2): section = Section(s_name, p2_section.type, p2_section.flags) sections_pass3[s_name] = section section.base = p2_section.base section.ptr = section.base symtab = sections_pass3['.symtab'] reloctab = sections_pass3['.reloc'] for s_name, section in iteritems(sections_pass3): DEBUG('pass #3: section %s', section.name) p2_section = sections_pass2[s_name] if section.type == SectionTypes.SYMBOLS: symtab = section elif section.type == SectionTypes.RELOC: reloctab = section for item in p2_section.content: ptr_prefix = 'pass #3: ' + UINT32_FMT(section.ptr) + ': ' if section.type == SectionTypes.SYMBOLS: if (type(item.name) is Label and item.name.name in global_symbols) or item.name in global_symbols: item.flags.globally_visible = True elif type(item) == IntSlot: if item.refers_to: reference, item.refers_to = item.refers_to, None DEBUG(ptr_prefix + 'refers to: %s', reference) reloc = RelocSlot(reference.label[1:], patch_section = references[item.refers_to].section, patch_address = section.ptr, patch_offset = 0, patch_size = 16, patch_add = reference.add) DEBUG(ptr_prefix + 'reloc slot created: %s', reloc) reloctab.content.append(reloc) item.value = 0x79797979 item.close() item = item.value elif hasattr(item, 'refers_to') and item.refers_to: reference, item.refers_to = item.refers_to, None DEBUG(ptr_prefix + 'refers to: label=%s, relative=%s, inst_aligned=%s', reference, item.desc.relative_address, item.desc.inst_aligned) reloc = RelocSlot(reference.label[1:], flags = RelocFlags.create(relative = item.desc.relative_address, inst_aligned = item.desc.inst_aligned), patch_section = section, patch_address = section.ptr, patch_add = reference.add) item.fill_reloc_slot(logger, item, reloc) DEBUG(ptr_prefix + 'reloc slot created: %s', reloc) reloctab.content.append(reloc) DEBUG(ptr_prefix + str(item)) if not isinstance(item, list): item = [item] for i in item: section.content.append(i) section.ptr += sizeof(i) DEBUG('pass #3: section %s finished: %s', section.name, section) DEBUG('Bytecode sections:') for s_name, section in iteritems(sections_pass3): DEBUG(str(section)) DEBUG('Bytecode translation completed') return sections_pass3