diff --git a/arctic.nimble b/arctic.nimble new file mode 100644 index 0000000..15424d7 --- /dev/null +++ b/arctic.nimble @@ -0,0 +1,16 @@ +# Package + +version = "0.1.0" +author = "Louis Burke" +description = "ARCTIC in Nim" +license = "Apache-2.0" +srcDir = "src" +installExt = @["nim"] +bin = @["arctic"] + + +# Dependencies + +requires "nim >= 2.0.0" +requires "bio" +requires "itertools" diff --git a/src/arctic.nim b/src/arctic.nim new file mode 100644 index 0000000..92234f0 --- /dev/null +++ b/src/arctic.nim @@ -0,0 +1,333 @@ +import std/[critbits, strformat, strutils, strmisc, re] +import bio +import itertools + +type + ArcticTypeKind* = enum + BYTE, SHORT, INT, WORD, DOUBLE + + ArcticType* {.union.} = object + b: int8 + s: int16 + i: int32 + w: int64 + d: float64 + p: pointer # only used for dynamically allocated memory + f: ArcticSymbol # also used for pointers to section memory + + ArcticVariableIndex* = enum + VARIABLE_A, VARIABLE_B, VARIABLE_C, + VARIABLE_I, VARIABLE_N, + VARIABLE_X, VARIABLE_Y, VARIABLE_Z + + ArcticSymbol* = tuple + section: string + index: int + + ArcticImmediateKind* = enum + PLAIN, VARIABLE, INTEGER, NUMBER, SYMBOL + + ArcticImmediate* = object + case kind: ArcticImmediateKind + of PLAIN: nil + of VARIABLE: v: ArcticVariableIndex + of INTEGER: i: int64 + of NUMBER: n: float64 + of SYMBOL: s: string + + ArcticOperation* = object + code: char + immediate: ArcticImmediate + + ArcticCode* = seq[ArcticOperation] + ArcticMemory* = seq[uint8] + + ArcticSection* = object + case iscode: bool + of true: code: ArcticCode + of false: data: ArcticMemory + ArcticStack* = seq[ArcticType] + + ArcticState* = object + symbols: CritBitTree[ArcticSymbol] # as name or section _name + sections: CritBitTree[ArcticSection] + stack: ArcticStack + pc: ArcticSymbol + registers: array[ArcticVariableIndex, ArcticType] + + ArcticStepResult* = enum + CONTINUE, BREAKPOINT, ERROR + + ArcticBuiltin* = proc (state: var ArcticState): ArcticStepResult + +const + ImmediateOps: set[char] = {'0', 'E', 'F', 'G', 'H', 'L', 'M', 'P', 'Q', 'd', 'o', 'u', '<', '{', '=', '}', '>', ',', ';', '[', ']'} + PlainOps : set[char] = { '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'I', 'J', 'K', 'N', 'O', 'R', 'S', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z', '_', '.', '+', '-', '*', '/', '%', '\\', '|', '&', '^', '!', '?', '(', ')', '`' } + +proc `$`*(variable: ArcticVariableIndex): string = + case variable: + of VARIABLE_A: return "A" + of VARIABLE_B: return "B" + of VARIABLE_C: return "C" + of VARIABLE_I: return "I" + of VARIABLE_N: return "N" + of VARIABLE_X: return "X" + of VARIABLE_Y: return "Y" + of VARIABLE_Z: return "Z" + +proc `$`*(op: ArcticOperation): string = + result &= op.code + + case op.immediate.kind: + of PLAIN: discard + of VARIABLE: result &= $op.immediate.v + of INTEGER: result &= $op.immediate.i + of NUMBER: result &= $op.immediate.n + of SYMBOL: result &= "$" & $op.immediate.s + + result &= " " + +proc `$`*(state: ArcticState): string = + result &= "Registers:\n" + for idx in ArcticVariableIndex: + result &= &" {idx} = {state.registers[idx]}\n" + + result &= "Stack:\n" + + for item in state.stack: + result &= &" {item}\n" + + for (name, section) in state.sections.pairs: + result &= &"\n\"{name}\": " + if section.iscode: + result &= "code \n" + for (i, m) in section.code.pairs: + for (label, location) in state.symbols.pairs: + if location.section == name and location.index == i: + result &= &":{label} " + if state.pc.section == name and state.pc.index == i: + result &= ">" + result &= $m + else: + result &= "\n" + for (i, m) in section.data.pairs: + for (label, location) in state.symbols.pairs: + if location.section == name and location.index == i: + result &= &":{label} " + result &= &"{m} " + +proc op(code: char, immediate: ArcticImmediate = ArcticImmediate(kind: PLAIN)): ArcticOperation = + return ArcticOperation(code: code, immediate: immediate) + +proc add(section: var ArcticSection, code: char) = + if section.iscode: + section.code.add code.op + else: + section.data.add code.uint8 + +proc tovar(code: char): ArcticVariableIndex = + case code.toLowerAscii: + of 'a': VARIABLE_A + of 'b': VARIABLE_B + of 'c': VARIABLE_C + of 'i': VARIABLE_I + of 'n': VARIABLE_N + of 'x': VARIABLE_X + of 'y': VARIABLE_Y + of 'z': VARIABLE_Z + else: raise new ValueError + +proc add(section: var ArcticSection, code: char, immstr: string, secname: string) = + if section.iscode: + if immstr.match(re"^\d+$"): + section.code.add code.op(ArcticImmediate(kind: INTEGER, i: immstr.parseInt)) + elif immstr.match(re"^[AaBbCcIiNnXxYyZz]$"): + section.code.add code.op(ArcticImmediate(kind: VARIABLE, v: immstr[0].tovar)) + elif immstr[0] == '$': + if immstr.len == 1 or immstr[1] == '_': + section.code.add code.op(ArcticImmediate(kind: SYMBOL, s: secname & " " & immstr)) + else: + section.code.add code.op(ArcticImmediate(kind: SYMBOL, s: immstr)) + else: + section.code.add code.op(ArcticImmediate(kind: NUMBER, n: immstr.parseFloat)) + +proc len(section: ArcticSection): int = + if section.iscode: + return section.code.len + else: + return section.data.len + +func grabnum(data: string): (string, string) = + ## Returns the numeric literal at the start of data, then the rest of data + var i = 0 + while i < data.len and data[i] in "0123456789.": + result[0] &= data[i] + i += 1 + + result[1] = data[i..^1] + +proc insert(section: var ArcticSection, data: string) = + if data.len == 0: + return + + if section.iscode: + return + + template insertInt(itype, utype, endianness) = + let (text, rest) = data[1..^1].grabnum + + if text[0] == '-': + let val: itype = itype(-text[1..^1].parseBiggestInt) + section.data.add cast[utype](val).serialize(endianness) + else: + let val: utype = utype(text.parseBiggestUInt) + section.data.add val.serialize(endianness) + + section.insert(rest) + + case data[0]: + of 'i': insertInt(int64, uint64, littleEndian) + of 'I': insertInt(int64, uint64, bigEndian) + of 'w': insertInt(int32, uint32, littleEndian) + of 'W': insertInt(int32, uint32, bigEndian) + of 's': insertInt(int16, uint16, littleEndian) + of 'S': insertInt(int16, uint16, bigEndian) + of 'b', 'B': # bytes don't have an endianness + let (text, rest) = data[1..^1].grabnum + + if text[0] == '-': + let val: int8 = int8(text[1..^1].parseBiggestInt) + section.data.add cast[uint8](val) + else: + let val: uint8 = uint8(text.parseBiggestUInt) + section.data.add val + + of 'f': + let (text, rest) = data[1..^1].grabnum + section.data.add cast[uint64](text.parseFloat).serialize(littleEndian) + section.insert(rest) + of 'F': + let (text, rest) = data[1..^1].grabnum + section.data.add cast[uint64](text.parseFloat).serialize(bigEndian) + section.insert(rest) + + of 'x': # hexadecimal byte constant + let (hex, x, rest) = data[1..^1].partition("x") + for pair in hex.chunked(2): + section.data.add uint8(parseHexInt(pair.join)) + section.insert(rest) + + of '"': # utf-8 string constant with escapes + let strlen = data.matchLen(re"^""([^""]|\\"")*""") + + if strlen < 0: + return # TODO: couldn't find a string, what to do? + + for c in data[1..^(strlen-2)]: + section.data.add c.uint8 + + section.insert(data[strlen..^1]) + + else: + section.insert(data[1..^1]) + +func load*(code: string): ArcticState = + var + section: string = "" + token: string = "" + + result.pc = ("", 0) + result.sections[section] = ArcticSection(iscode: true, code: @[]) + + for next in code: + debug_echo "token: ", token, ", next: ", next + if token.len == 0: # initial state + case next: + of ImmediateOps, '#', ':', '\"', '\'': + token &= next + of PlainOps, '\n': + result.sections[section].add next + else: + discard + else: + case token[0]: + of '#': # comment + if next == '\n': + token = "" + result.sections[section].add next + + of '"': # section switch + if next == '"' and token[^1] != '\\': + section = token[1..^1] + if section notin result.sections: + if section.toLowerAscii in @["code", "macros", ""]: + result.sections[section] = ArcticSection(iscode: true, code: @[]) + else: + result.sections[section] = ArcticSection(iscode: false, data: @[]) + token = "" + else: + token.add next + + of ':': # label name + if next == ' ' or next == '\n': + let idx = if token[1] == '_': section & " " & token[1..^1] else: token[1..^1] + result.symbols[idx] = (section: section, index: result.sections[section].len) + token = "" + else: + token.add next + + of '\'': # data injection + if next == '\'': + var + quoted = false + escaped = false + for c in token: + if c == '"': + if not escaped: + quoted = not quoted + escaped = false + elif c == '\\': + escaped = true + else: + escaped = false + + if quoted or escaped: + token.add next + else: + result.sections[section].insert token[1..^1] + token = "" + else: + token.add next + + of ImmediateOps: + if next == ' ': + result.sections[section].add(token[0], token[1..^1], section) + token = "" + else: + token.add next + + else: + discard + +proc step*(state: var ArcticState, builtins: CritBitTree[ArcticBuiltin]): ArcticStepResult = + let codesec = state.sections[state.pc.section] + + if not codesec.iscode: + return ERROR + + if state.pc.index >= codesec.code.len: + return ERROR + + let op = codesec.code[state.pc.index] + case op.code: + case '1': # LDAB + let top = state.stack[^1] + + + + ## TODO + + +when isMainModule: + let state = stdin.readAll.load + echo state