Implemented syscalls

This commit is contained in:
Louis Burke 2024-01-03 19:06:22 -05:00
parent 2836e25e65
commit c1e7ecec44
14 changed files with 298 additions and 31 deletions

13
.gitignore vendored
View file

@ -1,3 +1,16 @@
build/
/arctic
htmldocs
acdump
src/arctic/handle_syscalls.nim
syscalls.json
# Documentation artefacts
doc/documentation.html
doc/documentation.tex
doc/documentation.pdf
# LaTeX garbage
*.log
*.aux
*.out

View file

@ -15,5 +15,12 @@ requires "nim >= 2.0.0"
requires "bio"
requires "itertools"
before build:
exec "./generate_handle_syscalls.sh"
task docs, "docs":
exec "nim doc --project --index:on --outdir:src/htmldocs src/arctic.nim"
exec "multimarkdown -t html doc/documentation.md -o doc/documentation.html"
exec "multimarkdown -t latex doc/documentation.md -o doc/documentation.tex"
exec "sed -i 's/^\\[/{\\[}/' doc/documentation.tex"
exec "cd doc && xelatex -interaction=batchmode documentation.tex"

1
doc/arctic-begin.tex Normal file
View file

@ -0,0 +1 @@
\begin{document}

1
doc/arctic-footer.tex Normal file
View file

@ -0,0 +1 @@
\end{document}

12
doc/arctic-leader.tex Normal file
View file

@ -0,0 +1,12 @@
\documentclass{article}
\usepackage{hyperref}
\usepackage{tabularx}
\usepackage{tabulary}
\usepackage{booktabs}
\usepackage[normalem]{ulem}
\usepackage{glossaries}
\usepackage{soul}
\setcounter{secnumdepth}{0}
%\renewcommand{\chapternumberline}[1]{}% Gobble chapter numbers in TOC

View file

@ -1,7 +1,12 @@
---
Title: ARCTIC Documentation
Author: Louis A. Burke
Language: en
CSS: documentation.css
LaTeX Leader: arctic-leader.tex
LaTeX Begin: arctic-begin.tex
LaTeX Footer: arctic-footer.tex
LaTeX Header Level: 3
---
# ARCTIC Code
@ -343,7 +348,7 @@ instruction, indexed by an immediate integer based on the following table:
Immediate | Operation
-----------|--------------------------------------------------------------------
0 | ???
0 | ???
### Type Conversions
@ -569,7 +574,7 @@ $ |VALU| … | ⇒ | … \* | load constant value
### Summary
| -0 | -1 | -2 | -3 | -4 | -5 | -6 | -7
. | -0 | -1 | -2 | -3 | -4 | -5 | -6 | -7
---:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:
000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI`
001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F BMIS`
@ -579,7 +584,7 @@ $ |VALU| … | ⇒ | … \* | load constant value
101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC`
110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP`
111-|`u USHR`|`v FABS`|`w SDUP`|`x GETX`|`y GETY`|`z GETZ`|`_ INEG`|`. ITOF`
200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU`
200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`\| IABS`|`$ VALU`
201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN`
210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ ROTR`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL`
211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BIFC`|`␣ NOP`|`# COMM`|`¶ BEAT`

147
generate_handle_syscalls.jq Normal file
View file

@ -0,0 +1,147 @@
# run this as: jq -r -f SELF syscalls.json
# download syscalls.json from https://syscalls.mebeim.net/db/x86/64/x64/v6.6/table.json
def asargs: join(", ");
def csig: "\(.name)(\(.signature | asargs))";
def indent(w): w + (split("\n") | join("\n" + w));
def getargs: .signature | map(capture("(?<type>.*[^a-zA-Z_0-9]+)(?<name>[a-zA-Z_0-9]*)"));
def strim: sub("^[[:space:]]+"; "") | sub("[[:space:]]+$"; "");
def rawtype: gsub("const"; "") | strim;
# returns false if the given type may contain or be a pointer
def isflat: [.] | inside([
"char",
"unsigned char",
"int",
"struct cachestat_range",
"struct stat",
"struct pollfd",
"sigset_t",
"fd_set",
"struct __kernel_old_timeval",
"struct __kernel_old_itimerval",
"struct __kernel_timespec",
"struct sockaddr",
"void",
"loff_t",
"struct rusage",
"struct new_utsname",
"struct sembuf",
"struct msqid_ds", # unused pointers
"struct linux_dirent",
"struct timezone",
"struct rlimit",
"struct sysinfo",
"struct tms",
"gid_t",
"uid_t",
"siginfo_t",
"struct utimbuf",
"struct ustat",
"struct statfs",
"struct sched_param",
"struct __kernel_timex",
"__kernel_old_time_t",
"u32",
"unsigned long",
"aio_context_t",
"struct io_event",
"struct iocb",
"struct linux_dirent64",
"timer_t",
"struct __kernel_itimerspec",
"struct epoll_event",
"struct mq_attr",
"unsigned int",
"struct siginfo",
"size_t",
"struct perf_event_attr",
"struct rlimit64",
"struct file_handle",
"struct getcpu_cache",
"struct sched_attr",
"union bpf_attr",
"struct statx",
"struct __aio_sigset",
"struct rseq",
"struct io_uring_params",
"struct clone_args",
"struct open_how",
"struct mount_attr",
"struct landlock_ruleset_attr",
"struct futex_waitv",
""
]);
# returns true if the given type definitely contains a pointer
def isntflat: [.] | inside([
"struct sigaction",
"struct iovec",
"struct shmid_ds",
"struct user_msghdr",
"struct msgbuf",
"stack_t",
"struct sigevent",
"struct kexec_segment",
"struct robust_list_head",
"struct mmsghdr",
""
]);
def nimtype: rawtype |
if . == "" then null
# pointers
elif endswith("*") then rtrimstr("*") | strim |
if isflat then "pointer"
elif isntflat then "pointer #[ TODO \(.) is not a flat type ]#"
elif endswith("*") then "pointer #[ TODO \(.) is a nested pointer ]#"
else error("\(.) is of dubious flatness!") end
# scalar types
elif startswith("enum") then "cint"
elif [.] | inside(["int", "key_t", "pid_t", "clockid_t", "timer_t", "mqd_t", "__s32", "rwf_t"]) then "cint"
elif [.] | inside(["size_t"]) then "csize_t"
elif [.] | inside(["umode_t"]) then "cushort"
elif [.] | inside(["unsigned int", "uid_t", "gid_t", "qid_t", "u32", "__u32"]) then "cuint"
elif [.] | inside(["unsigned long", "off_t"]) then "clong"
elif [.] | inside(["aio_context_t", "__u64"]) then "culong"
elif [.] | inside(["loff_t"]) then "clonglong"
elif [.] | inside(["key_serial_t"]) then "int32"
elif [.] | inside(["cap_user_header_t", "cap_user_data_t"]) then "pointer"
else error("\(.) is unknown as a nimtype") end;
def nimstack: rawtype |
if . == "" then null
# pointers
elif endswith("*") then rtrimstr("*") | strim |
if isflat then "state.memory.raw_address(state.stack.pop.p)"
elif isntflat then "nil # TODO \(.) is not a flat type"
elif endswith("*") then "nil # TODO \(.) is a nested pointer"
else error("\(.) is of dubious flatness!") end
# scalar types
elif startswith("enum") then "state.stack.pop.i.cint"
elif [.] | inside(["int", "key_t", "pid_t", "clockid_t", "timer_t", "mqd_t", "__s32", "rwf_t"]) then "state.stack.pop.i.cint"
elif [.] | inside(["size_t"]) then "state.stack.pop.u.csize_t"
elif [.] | inside(["umode_t"]) then "state.stack.pop.i.cushort"
elif [.] | inside(["unsigned int", "uid_t", "gid_t", "qid_t", "u32", "__u32"]) then "state.stack.pop.u.cuint"
elif [.] | inside(["unsigned long", "off_t"]) then "state.stack.pop.i.clong"
elif [.] | inside(["aio_context_t", "__u64"]) then "state.stack.pop.u.culong"
elif [.] | inside(["loff_t"]) then "state.stack.pop.i.clonglong"
elif [.] | inside(["key_serial_t"]) then "state.stack.pop.i.int32"
elif [.] | inside(["cap_user_header_t", "cap_user_data_t"]) then "state.memory.raw_address(state.stack.pop.p)"
else error("\(.) is unknown as a nimtype") end;
def letstr: getargs | map("let syscall_\(.name | gsub("_"; "")): \(.type | nimtype) = \(.type | nimstack)") | join("\n");
def argcomma: if ( getargs | length ) == 0 then "" else ", " end;
def argstr: getargs | map("syscall_" + (.name | gsub("_"; ""))) | join(", ");
.syscalls[] |
" of \(.number): # \(csig)\n" +
"\(letstr | indent(" "))\n" +
" let retval: clong = syscall(\(.number)\(argcomma)\(argstr))\n" +
" state.stack.add ArcticType(i: retval.int64)\n"

27
generate_handle_syscalls.sh Executable file
View file

@ -0,0 +1,27 @@
#!/bin/bash
if [ -e src/arctic/handle_syscalls.nim ]; then
exit 0
fi
if [ ! -e syscalls.json ]; then
curl 'https://syscalls.mebeim.net/db/x86/64/x64/v6.6/table.json' > syscalls.json
fi
cat <<EOF > src/arctic/handle_syscalls.nim
import types
import memory
proc syscall(number: clong): clong {.importcpp: "syscall(@)", header: "<unistd.h>", varargs.}
template handle_syscall*(state: var ArcticState, number: int64) =
case number:
EOF
jq -r -f generate_handle_syscalls.jq < syscalls.json >> src/arctic/handle_syscalls.nim
cat <<EOF >> src/arctic/handle_syscalls.nim
else:
# TODO: invalid syscall
discard
EOF

View file

@ -3,8 +3,7 @@
@main
"data"
:message '"Hello, World!\x00"'
:message '"Hello, World!\x0A"'
"code"
:main T3 kk # main is called as: &argv[0] argc retaddr
$message `putstrln $0 ;
:main $15 $message $1 `1 $0 ;

View file

@ -14,18 +14,33 @@ macro arctic_builtin(builtin: typed): untyped =
`builtin`
DefaultBuiltins[`namestr`] = `nameid`
macro arctic_builtin_secret(builtin: typed): untyped =
let nameid = builtin.name
let namestr = newLit(nameid.strVal & "_")
result = quote do:
`builtin`
DefaultBuiltins[`namestr`] = `nameid`
proc putstrln(state: var ArcticState): ArcticStepResult {.arctic_builtin.} =
var p = state.stack.pop.p
while state.memory[p] != 0:
stdout.write(state.memory[p])
stdout.write(state.memory[p].char)
inc p
stdout.write('\n')
return CONTINUE
proc debugdump(state: var ArcticState): ArcticStepResult {.arctic_builtin_secret.} =
echo state
return CONTINUE
proc parseCmdLine(): (File, seq[string]) =
let argv = commandLineParams()
var argv = commandLineParams()
if len(argv) > 0:
return (open(argv[0]), argv[1..^0])
let fname = argv[0]
argv.delete(0)
return (open(fname), argv)
else:
return (stdin, @[])
@ -47,9 +62,9 @@ when isMainModule:
for (i, a) in enumerate(argv):
state.memory.write(x + 8 * i, cast[uint64](a.int64).serialize(littleEndian))
state.memory.write(x + 8 * args.len, 0.uint64.serialize(littleEndian))
state.stack.add ArcticType(i: 0) # argv
state.stack.add ArcticType(i: x) # argv
state.stack.add ArcticType(i: args.len) # argc
state.stack.add ArcticType(i: 0) # return address: null
state.stack.add ArcticType(i: -1) # return address: special "exit"
while true:
case state.step(builtins):

View file

@ -128,7 +128,8 @@ func load*(code: string): ArcticState =
of ImmediateOps, '#', ':', '\"', '\'':
token &= next
of PlainOps, '\n':
result.code.add(section, raw_op(next))
if section.iscode:
result.code.add(section, raw_op(next))
else:
discard
else:
@ -136,7 +137,8 @@ func load*(code: string): ArcticState =
of '#': # comment
if next == '\n':
token = ""
result.code.add(section, raw_op(next))
if section.iscode:
result.code.add(section, raw_op(next))
of '"': # section switch
if next == '"' and token[^1] != '\\':
@ -151,7 +153,8 @@ func load*(code: string): ArcticState =
if section.iscode:
result.symbols[idx] = result.code.current(section)
else:
result.symbols[idx] = result.code.current(section)
result.memory.register(section)
result.symbols[idx] = result.memory.current(section)
token = ""
else:
token.add next
@ -174,17 +177,19 @@ func load*(code: string): ArcticState =
if quoted or escaped:
token.add next
else:
result.memory.add(section, parse_data(token[1..^1]))
if not section.iscode:
result.memory.add(section, parse_data(token[1..^1]))
token = ""
else:
token.add next
of ImmediateOps:
if next in " \n":
result.code.add(section, ArcticOperation(
code: token[0],
immediate: parse_immediate(token[1..^1], section),
bigendian: isbigendian(section)))
if section.iscode:
result.code.add(section, ArcticOperation(
code: token[0],
immediate: parse_immediate(token[1..^1], section),
bigendian: isbigendian(section)))
token = ""
else:
token.add next

View file

@ -49,6 +49,13 @@ proc van_der_corput(n: int): int =
return q
proc register*[T](memory: var Memory[T], section: string) =
## Registers a section as existing in this memory space, without adding any
## data to it
if not (section in memory.sects):
memory.sects[section] = memory.chunks.len
memory.chunks.add @[]
proc add*[T](memory: var Memory[T], section: string, value: T) =
## Adds value to the end of memory in section.
if not (section in memory.sects):
@ -106,6 +113,24 @@ proc contains*[T](memory: Memory[T], address: int): bool =
let (blkid, offset) = divmod(address - HUG_BLOCK_START, HUG_BLOCK_SIZE)
return memory.hugmem[blkid].len >= offset
proc raw_address*[T](memory: var Memory[T], address: int): ptr T =
## Returns the "raw" address of a given section and offset
if address < SML_BLOCK_START:
let (idx, off) = memory.addrinfo(address)
return addr memory.chunks[idx][off]
elif address < MID_BLOCK_START:
let (blkid, offset) = divmod(address - SML_BLOCK_START, SML_BLOCK_SIZE)
return addr memory.smlmem[blkid][offset]
elif address < BIG_BLOCK_START:
let (blkid, offset) = divmod(address - MID_BLOCK_START, MID_BLOCK_SIZE)
return addr memory.midmem[blkid][offset]
elif address < HUG_BLOCK_START:
let (blkid, offset) = divmod(address - BIG_BLOCK_START, BIG_BLOCK_SIZE)
return addr memory.bigmem[blkid][offset]
else:
let (blkid, offset) = divmod(address - HUG_BLOCK_START, HUG_BLOCK_SIZE)
return addr memory.hugmem[blkid][offset]
proc `[]`*[T](memory: Memory[T], address: int): T =
## Accesses the underlying memory item for the given address, failing if it
## does not exist

View file

@ -2,6 +2,7 @@ import std/[bitops, critbits, math]
import memory
import types
import handle_syscalls
proc branch(state: var ArcticState, count: int) =
var n = 0
@ -16,10 +17,13 @@ proc branch(state: var ArcticState, count: int) =
if state.code[state.pc].code == '\n':
n.dec
proc step*(state: var ArcticState, builtins: CritBitTree[ArcticBuiltin]): ArcticStepResult =
if not (state.pc in state.code):
return EXIT
if state.pc == 0:
return EXIT
let op = state.code[state.pc]
state.pc.inc
@ -723,7 +727,7 @@ proc step*(state: var ArcticState, builtins: CritBitTree[ArcticBuiltin]): Arctic
of VARIABLE:
discard
of INTEGER:
discard # TODO: integer "syscalls" (probably just syscall(n, ...)
state.handle_syscall(op.immediate.i)
of NUMBER:
discard
of SYMBOL:

View file

@ -52,6 +52,10 @@ type
ArcticBuiltin* = proc (state: var ArcticState): ArcticStepResult {.nimcall.}
proc `$`*(value: ArcticType): string =
return &"{value.u:016X}"
# return &"(b:0x{value.b:2X}/{value.b}/'{value.b.char}',s:0x{value.s:4X}/{value.s},w:0x{value.w:8X}/{value.w},i:{value.i},u:{value.u},d:{value.d},p/f:{value.u:016X})"
proc `$`*(variable: ArcticVariableIndex): string =
case variable:
of VARIABLE_A: return "A"
@ -78,33 +82,35 @@ proc `$`*(state: ArcticState): string =
for idx in ArcticVariableIndex:
result &= &" {idx} = {state.registers[idx]}\n"
result &= "Stack:\n"
result &= "Stack (upside-down):\n"
for item in state.stack:
result &= &" {item}\n"
result &= "Sections:\n"
result &= "Code:\n"
for section in state.code.sections:
result &= "\n section "
result &= section
result &= &"«{section}»"
result &= ":\n"
for (i, m) in state.code.data(section).pairs:
for (label, location) in state.symbols.pairs:
if -location == state.code.address(section, i):
result &= &":{label} "
if state.pc == state.code.address(section, i):
result &= ">"
if location == state.code.address(section, i):
result &= &"«:{label}@{location:016x}» "
if state.pc == state.code.address(section, i):
result &= ""
if m.code != ' ':
result &= $m
result &= &"{m} "
result &= "\nData:\n"
for section in state.memory.sections:
result &= "\n section "
result &= section
result &= &"«{section}»"
result &= ":\n"
for (i, m) in state.memory.data(section).pairs:
for (label, location) in state.symbols.pairs:
if location == state.memory.address(section, i):
result &= &":{label} "
result &= &"{m} "
result &= &"«:{label}@{location:016x}» "
result &= &"{m:02x}"