diff --git a/.gitignore b/.gitignore index 308a0dd..f86f5b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,16 @@ build/ /arctic htmldocs +acdump +src/arctic/handle_syscalls.nim +syscalls.json + +# Documentation artefacts +doc/documentation.html +doc/documentation.tex +doc/documentation.pdf + +# LaTeX garbage +*.log +*.aux +*.out diff --git a/arctic.nimble b/arctic.nimble index 5f8a450..50792ac 100644 --- a/arctic.nimble +++ b/arctic.nimble @@ -15,5 +15,12 @@ requires "nim >= 2.0.0" requires "bio" requires "itertools" +before build: + exec "./generate_handle_syscalls.sh" + task docs, "docs": exec "nim doc --project --index:on --outdir:src/htmldocs src/arctic.nim" + exec "multimarkdown -t html doc/documentation.md -o doc/documentation.html" + exec "multimarkdown -t latex doc/documentation.md -o doc/documentation.tex" + exec "sed -i 's/^\\[/{\\[}/' doc/documentation.tex" + exec "cd doc && xelatex -interaction=batchmode documentation.tex" diff --git a/doc/arctic-begin.tex b/doc/arctic-begin.tex new file mode 100644 index 0000000..0a6cf14 --- /dev/null +++ b/doc/arctic-begin.tex @@ -0,0 +1 @@ +\begin{document} diff --git a/doc/arctic-footer.tex b/doc/arctic-footer.tex new file mode 100644 index 0000000..6b47932 --- /dev/null +++ b/doc/arctic-footer.tex @@ -0,0 +1 @@ +\end{document} diff --git a/doc/arctic-leader.tex b/doc/arctic-leader.tex new file mode 100644 index 0000000..a015c1a --- /dev/null +++ b/doc/arctic-leader.tex @@ -0,0 +1,12 @@ +\documentclass{article} + +\usepackage{hyperref} +\usepackage{tabularx} +\usepackage{tabulary} +\usepackage{booktabs} +\usepackage[normalem]{ulem} +\usepackage{glossaries} +\usepackage{soul} + +\setcounter{secnumdepth}{0} +%\renewcommand{\chapternumberline}[1]{}% Gobble chapter numbers in TOC diff --git a/doc/documentation.md b/doc/documentation.md index 13c0873..02e218b 100644 --- a/doc/documentation.md +++ b/doc/documentation.md @@ -1,7 +1,12 @@ --- Title: ARCTIC Documentation Author: Louis A. Burke +Language: en CSS: documentation.css +LaTeX Leader: arctic-leader.tex +LaTeX Begin: arctic-begin.tex +LaTeX Footer: arctic-footer.tex +LaTeX Header Level: 3 --- # ARCTIC Code @@ -343,7 +348,7 @@ instruction, indexed by an immediate integer based on the following table: Immediate | Operation -----------|-------------------------------------------------------------------- - 0 | ??? + 0 | ??? ### Type Conversions @@ -569,7 +574,7 @@ $ |VALU| … | ⇒ | … \* | load constant value ### Summary - | -0 | -1 | -2 | -3 | -4 | -5 | -6 | -7 + . | -0 | -1 | -2 | -3 | -4 | -5 | -6 | -7 ---:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------: 000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI` 001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F BMIS` @@ -579,7 +584,7 @@ $ |VALU| … | ⇒ | … \* | load constant value 101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC` 110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP` 111-|`u USHR`|`v FABS`|`w SDUP`|`x GETX`|`y GETY`|`z GETZ`|`_ INEG`|`. ITOF` -200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU` +200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`\| IABS`|`$ VALU` 201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN` 210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ ROTR`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL` 211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BIFC`|`␣ NOP`|`# COMM`|`¶ BEAT` diff --git a/generate_handle_syscalls.jq b/generate_handle_syscalls.jq new file mode 100644 index 0000000..eaf35e8 --- /dev/null +++ b/generate_handle_syscalls.jq @@ -0,0 +1,147 @@ +# run this as: jq -r -f SELF syscalls.json +# download syscalls.json from https://syscalls.mebeim.net/db/x86/64/x64/v6.6/table.json + +def asargs: join(", "); +def csig: "\(.name)(\(.signature | asargs))"; +def indent(w): w + (split("\n") | join("\n" + w)); +def getargs: .signature | map(capture("(?.*[^a-zA-Z_0-9]+)(?[a-zA-Z_0-9]*)")); +def strim: sub("^[[:space:]]+"; "") | sub("[[:space:]]+$"; ""); +def rawtype: gsub("const"; "") | strim; + +# returns false if the given type may contain or be a pointer +def isflat: [.] | inside([ + "char", + "unsigned char", + "int", + "struct cachestat_range", + "struct stat", + "struct pollfd", + "sigset_t", + "fd_set", + "struct __kernel_old_timeval", + "struct __kernel_old_itimerval", + "struct __kernel_timespec", + "struct sockaddr", + "void", + "loff_t", + "struct rusage", + "struct new_utsname", + "struct sembuf", + "struct msqid_ds", # unused pointers + "struct linux_dirent", + "struct timezone", + "struct rlimit", + "struct sysinfo", + "struct tms", + "gid_t", + "uid_t", + "siginfo_t", + "struct utimbuf", + "struct ustat", + "struct statfs", + "struct sched_param", + "struct __kernel_timex", + "__kernel_old_time_t", + "u32", + "unsigned long", + "aio_context_t", + "struct io_event", + "struct iocb", + "struct linux_dirent64", + "timer_t", + "struct __kernel_itimerspec", + "struct epoll_event", + "struct mq_attr", + "unsigned int", + "struct siginfo", + "size_t", + "struct perf_event_attr", + "struct rlimit64", + "struct file_handle", + "struct getcpu_cache", + "struct sched_attr", + "union bpf_attr", + "struct statx", + "struct __aio_sigset", + "struct rseq", + "struct io_uring_params", + "struct clone_args", + "struct open_how", + "struct mount_attr", + "struct landlock_ruleset_attr", + "struct futex_waitv", + "" +]); + +# returns true if the given type definitely contains a pointer +def isntflat: [.] | inside([ + "struct sigaction", + "struct iovec", + "struct shmid_ds", + "struct user_msghdr", + "struct msgbuf", + "stack_t", + "struct sigevent", + "struct kexec_segment", + "struct robust_list_head", + "struct mmsghdr", + "" +]); + +def nimtype: rawtype | + if . == "" then null + + # pointers + elif endswith("*") then rtrimstr("*") | strim | + if isflat then "pointer" + elif isntflat then "pointer #[ TODO \(.) is not a flat type ]#" + elif endswith("*") then "pointer #[ TODO \(.) is a nested pointer ]#" + else error("\(.) is of dubious flatness!") end + + # scalar types + elif startswith("enum") then "cint" + elif [.] | inside(["int", "key_t", "pid_t", "clockid_t", "timer_t", "mqd_t", "__s32", "rwf_t"]) then "cint" + elif [.] | inside(["size_t"]) then "csize_t" + elif [.] | inside(["umode_t"]) then "cushort" + elif [.] | inside(["unsigned int", "uid_t", "gid_t", "qid_t", "u32", "__u32"]) then "cuint" + elif [.] | inside(["unsigned long", "off_t"]) then "clong" + elif [.] | inside(["aio_context_t", "__u64"]) then "culong" + elif [.] | inside(["loff_t"]) then "clonglong" + elif [.] | inside(["key_serial_t"]) then "int32" + elif [.] | inside(["cap_user_header_t", "cap_user_data_t"]) then "pointer" + else error("\(.) is unknown as a nimtype") end; + +def nimstack: rawtype | + if . == "" then null + + # pointers + elif endswith("*") then rtrimstr("*") | strim | + if isflat then "state.memory.raw_address(state.stack.pop.p)" + elif isntflat then "nil # TODO \(.) is not a flat type" + elif endswith("*") then "nil # TODO \(.) is a nested pointer" + else error("\(.) is of dubious flatness!") end + + # scalar types + elif startswith("enum") then "state.stack.pop.i.cint" + elif [.] | inside(["int", "key_t", "pid_t", "clockid_t", "timer_t", "mqd_t", "__s32", "rwf_t"]) then "state.stack.pop.i.cint" + elif [.] | inside(["size_t"]) then "state.stack.pop.u.csize_t" + elif [.] | inside(["umode_t"]) then "state.stack.pop.i.cushort" + elif [.] | inside(["unsigned int", "uid_t", "gid_t", "qid_t", "u32", "__u32"]) then "state.stack.pop.u.cuint" + elif [.] | inside(["unsigned long", "off_t"]) then "state.stack.pop.i.clong" + elif [.] | inside(["aio_context_t", "__u64"]) then "state.stack.pop.u.culong" + elif [.] | inside(["loff_t"]) then "state.stack.pop.i.clonglong" + elif [.] | inside(["key_serial_t"]) then "state.stack.pop.i.int32" + elif [.] | inside(["cap_user_header_t", "cap_user_data_t"]) then "state.memory.raw_address(state.stack.pop.p)" + else error("\(.) is unknown as a nimtype") end; + + +def letstr: getargs | map("let syscall_\(.name | gsub("_"; "")): \(.type | nimtype) = \(.type | nimstack)") | join("\n"); + +def argcomma: if ( getargs | length ) == 0 then "" else ", " end; +def argstr: getargs | map("syscall_" + (.name | gsub("_"; ""))) | join(", "); + +.syscalls[] | + " of \(.number): # \(csig)\n" + + "\(letstr | indent(" "))\n" + + " let retval: clong = syscall(\(.number)\(argcomma)\(argstr))\n" + + " state.stack.add ArcticType(i: retval.int64)\n" diff --git a/generate_handle_syscalls.sh b/generate_handle_syscalls.sh new file mode 100755 index 0000000..7f4e807 --- /dev/null +++ b/generate_handle_syscalls.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +if [ -e src/arctic/handle_syscalls.nim ]; then + exit 0 +fi + +if [ ! -e syscalls.json ]; then + curl 'https://syscalls.mebeim.net/db/x86/64/x64/v6.6/table.json' > syscalls.json +fi + +cat < src/arctic/handle_syscalls.nim +import types +import memory + +proc syscall(number: clong): clong {.importcpp: "syscall(@)", header: "", varargs.} + +template handle_syscall*(state: var ArcticState, number: int64) = + case number: +EOF + +jq -r -f generate_handle_syscalls.jq < syscalls.json >> src/arctic/handle_syscalls.nim + +cat <> src/arctic/handle_syscalls.nim + else: + # TODO: invalid syscall + discard +EOF diff --git a/samples/hello.ctc b/samples/hello.ctc index cb63bb0..320d15d 100644 --- a/samples/hello.ctc +++ b/samples/hello.ctc @@ -3,8 +3,7 @@ @main "data" -:message '"Hello, World!\x00"' +:message '"Hello, World!\x0A"' "code" -:main T3 kk # main is called as: &argv[0] argc retaddr -$message `putstrln $0 ; +:main $15 $message $1 `1 $0 ; diff --git a/src/arctic.nim b/src/arctic.nim index 439548c..adc3ccc 100644 --- a/src/arctic.nim +++ b/src/arctic.nim @@ -14,18 +14,33 @@ macro arctic_builtin(builtin: typed): untyped = `builtin` DefaultBuiltins[`namestr`] = `nameid` +macro arctic_builtin_secret(builtin: typed): untyped = + let nameid = builtin.name + let namestr = newLit(nameid.strVal & "_") + + result = quote do: + `builtin` + DefaultBuiltins[`namestr`] = `nameid` + proc putstrln(state: var ArcticState): ArcticStepResult {.arctic_builtin.} = var p = state.stack.pop.p while state.memory[p] != 0: - stdout.write(state.memory[p]) + stdout.write(state.memory[p].char) + inc p stdout.write('\n') return CONTINUE +proc debugdump(state: var ArcticState): ArcticStepResult {.arctic_builtin_secret.} = + echo state + return CONTINUE + proc parseCmdLine(): (File, seq[string]) = - let argv = commandLineParams() + var argv = commandLineParams() if len(argv) > 0: - return (open(argv[0]), argv[1..^0]) + let fname = argv[0] + argv.delete(0) + return (open(fname), argv) else: return (stdin, @[]) @@ -47,9 +62,9 @@ when isMainModule: for (i, a) in enumerate(argv): state.memory.write(x + 8 * i, cast[uint64](a.int64).serialize(littleEndian)) state.memory.write(x + 8 * args.len, 0.uint64.serialize(littleEndian)) - state.stack.add ArcticType(i: 0) # argv + state.stack.add ArcticType(i: x) # argv state.stack.add ArcticType(i: args.len) # argc - state.stack.add ArcticType(i: 0) # return address: null + state.stack.add ArcticType(i: -1) # return address: special "exit" while true: case state.step(builtins): diff --git a/src/arctic/load.nim b/src/arctic/load.nim index 5f2b3fe..262c34b 100644 --- a/src/arctic/load.nim +++ b/src/arctic/load.nim @@ -128,7 +128,8 @@ func load*(code: string): ArcticState = of ImmediateOps, '#', ':', '\"', '\'': token &= next of PlainOps, '\n': - result.code.add(section, raw_op(next)) + if section.iscode: + result.code.add(section, raw_op(next)) else: discard else: @@ -136,7 +137,8 @@ func load*(code: string): ArcticState = of '#': # comment if next == '\n': token = "" - result.code.add(section, raw_op(next)) + if section.iscode: + result.code.add(section, raw_op(next)) of '"': # section switch if next == '"' and token[^1] != '\\': @@ -151,7 +153,8 @@ func load*(code: string): ArcticState = if section.iscode: result.symbols[idx] = result.code.current(section) else: - result.symbols[idx] = result.code.current(section) + result.memory.register(section) + result.symbols[idx] = result.memory.current(section) token = "" else: token.add next @@ -174,17 +177,19 @@ func load*(code: string): ArcticState = if quoted or escaped: token.add next else: - result.memory.add(section, parse_data(token[1..^1])) + if not section.iscode: + result.memory.add(section, parse_data(token[1..^1])) token = "" else: token.add next of ImmediateOps: if next in " \n": - result.code.add(section, ArcticOperation( - code: token[0], - immediate: parse_immediate(token[1..^1], section), - bigendian: isbigendian(section))) + if section.iscode: + result.code.add(section, ArcticOperation( + code: token[0], + immediate: parse_immediate(token[1..^1], section), + bigendian: isbigendian(section))) token = "" else: token.add next diff --git a/src/arctic/memory.nim b/src/arctic/memory.nim index 37eb185..2627e06 100644 --- a/src/arctic/memory.nim +++ b/src/arctic/memory.nim @@ -49,6 +49,13 @@ proc van_der_corput(n: int): int = return q +proc register*[T](memory: var Memory[T], section: string) = + ## Registers a section as existing in this memory space, without adding any + ## data to it + if not (section in memory.sects): + memory.sects[section] = memory.chunks.len + memory.chunks.add @[] + proc add*[T](memory: var Memory[T], section: string, value: T) = ## Adds value to the end of memory in section. if not (section in memory.sects): @@ -106,6 +113,24 @@ proc contains*[T](memory: Memory[T], address: int): bool = let (blkid, offset) = divmod(address - HUG_BLOCK_START, HUG_BLOCK_SIZE) return memory.hugmem[blkid].len >= offset +proc raw_address*[T](memory: var Memory[T], address: int): ptr T = + ## Returns the "raw" address of a given section and offset + if address < SML_BLOCK_START: + let (idx, off) = memory.addrinfo(address) + return addr memory.chunks[idx][off] + elif address < MID_BLOCK_START: + let (blkid, offset) = divmod(address - SML_BLOCK_START, SML_BLOCK_SIZE) + return addr memory.smlmem[blkid][offset] + elif address < BIG_BLOCK_START: + let (blkid, offset) = divmod(address - MID_BLOCK_START, MID_BLOCK_SIZE) + return addr memory.midmem[blkid][offset] + elif address < HUG_BLOCK_START: + let (blkid, offset) = divmod(address - BIG_BLOCK_START, BIG_BLOCK_SIZE) + return addr memory.bigmem[blkid][offset] + else: + let (blkid, offset) = divmod(address - HUG_BLOCK_START, HUG_BLOCK_SIZE) + return addr memory.hugmem[blkid][offset] + proc `[]`*[T](memory: Memory[T], address: int): T = ## Accesses the underlying memory item for the given address, failing if it ## does not exist diff --git a/src/arctic/step.nim b/src/arctic/step.nim index 0c2bcdb..5fe413d 100644 --- a/src/arctic/step.nim +++ b/src/arctic/step.nim @@ -2,6 +2,7 @@ import std/[bitops, critbits, math] import memory import types +import handle_syscalls proc branch(state: var ArcticState, count: int) = var n = 0 @@ -16,10 +17,13 @@ proc branch(state: var ArcticState, count: int) = if state.code[state.pc].code == '\n': n.dec + proc step*(state: var ArcticState, builtins: CritBitTree[ArcticBuiltin]): ArcticStepResult = if not (state.pc in state.code): return EXIT + if state.pc == 0: + return EXIT let op = state.code[state.pc] state.pc.inc @@ -723,7 +727,7 @@ proc step*(state: var ArcticState, builtins: CritBitTree[ArcticBuiltin]): Arctic of VARIABLE: discard of INTEGER: - discard # TODO: integer "syscalls" (probably just syscall(n, ...) + state.handle_syscall(op.immediate.i) of NUMBER: discard of SYMBOL: diff --git a/src/arctic/types.nim b/src/arctic/types.nim index 8aede94..99d70d0 100644 --- a/src/arctic/types.nim +++ b/src/arctic/types.nim @@ -52,6 +52,10 @@ type ArcticBuiltin* = proc (state: var ArcticState): ArcticStepResult {.nimcall.} +proc `$`*(value: ArcticType): string = + return &"{value.u:016X}" + # return &"(b:0x{value.b:2X}/{value.b}/'{value.b.char}',s:0x{value.s:4X}/{value.s},w:0x{value.w:8X}/{value.w},i:{value.i},u:{value.u},d:{value.d},p/f:{value.u:016X})" + proc `$`*(variable: ArcticVariableIndex): string = case variable: of VARIABLE_A: return "A" @@ -78,33 +82,35 @@ proc `$`*(state: ArcticState): string = for idx in ArcticVariableIndex: result &= &" {idx} = {state.registers[idx]}\n" - result &= "Stack:\n" + result &= "Stack (upside-down):\n" for item in state.stack: result &= &" {item}\n" - result &= "Sections:\n" + result &= "Code:\n" for section in state.code.sections: result &= "\n section " - result &= section + result &= &"«{section}»" result &= ":\n" for (i, m) in state.code.data(section).pairs: for (label, location) in state.symbols.pairs: - if -location == state.code.address(section, i): - result &= &":{label} " - if state.pc == state.code.address(section, i): - result &= ">" + if location == state.code.address(section, i): + result &= &"«:{label}@{location:016x}» " + if state.pc == state.code.address(section, i): + result &= "⋄" if m.code != ' ': - result &= $m + result &= &"{m} " + + result &= "\nData:\n" for section in state.memory.sections: result &= "\n section " - result &= section + result &= &"«{section}»" result &= ":\n" for (i, m) in state.memory.data(section).pairs: for (label, location) in state.symbols.pairs: if location == state.memory.address(section, i): - result &= &":{label} " - result &= &"{m} " + result &= &"«:{label}@{location:016x}» " + result &= &"{m:02x}"