Compare commits

...

10 commits

Author SHA1 Message Date
Louis Burke 7626f83e90 Started work on x86 AHOT 2024-09-23 19:39:53 -04:00
Louis Burke fde87a9aa7 Removed more code artifacts 2024-01-04 19:46:02 -05:00
Louis Burke 222cf580e0 Removed Nim stuff 2024-01-04 09:44:58 -05:00
Louis Burke c1e7ecec44 Implemented syscalls 2024-01-03 19:06:22 -05:00
Louis Burke 2836e25e65 Restructured interpreter 2023-12-25 20:18:08 -05:00
Louis Burke c990114723 Finished rough draft 2023-12-21 20:12:37 -05:00
Louis Burke e2642ec3ac Converted addressing to simple 64-bit integers 2023-12-19 22:42:45 -05:00
Louis Burke d234d5180d Swapped i and w in type 2023-12-19 00:48:15 -05:00
Louis Burke 13f9330eea Implemented many operations 2023-12-19 00:31:53 -05:00
Louis Burke a89fe4f0c6 Finished loading
Need to decide on how to store pointers before continuing.
2023-11-13 20:06:26 -05:00
34 changed files with 1397 additions and 294 deletions

24
.gitignore vendored Normal file
View file

@ -0,0 +1,24 @@
build/
/arctic
htmldocs
acdump
src/arctic/handle_syscalls.nim
syscalls.json
obj/
# Generated sources
src/arch/x86_64.h
# Documentation artifacts
documentation.html
documentation.tex
documentation.pdf
overview.html
overview.tex
overview.pdf
cheatsheet.pdf
# LaTeX garbage
*.log
*.aux
*.out

91
Makefile Normal file
View file

@ -0,0 +1,91 @@
.PHONY: default
.SUFFIXES:
MAKEFLAGS+=--no-builtin-rules
NODEPS=clean
OBJECT_DIR=obj
SOURCE_DIR=src
EMPTY:=
SPACE:=$(EMPTY) $(EMPTY)
COMMA:=,
default: all
all: docs arctic
DOCUMENTS=$(shell find doc/ -name '*.md')
HTMLDOCS=$(patsubst doc/%.md,%.html,$(DOCUMENTS))
PDFDOCS=$(patsubst doc/%.md,%.pdf,$(DOCUMENTS)) cheatsheet.pdf
docs: $(HTMLDOCS) $(PDFDOCS)
%.html: doc/%.md
multimarkdown -t html $^ -o $@
%.tex: %.md
multimarkdown -t latex $^ | sed 's/^\[/{[}/g' > $@
%.pdf: doc/%.tex doc/arctic-leader.tex doc/arctic-begin.tex doc/arctic-footer.tex
# cd doc && latexmk -pdf -xelatex -use-make $*.tex && mv $*.pdf ../
cd $$(dirname $<) && xelatex -interaction=batchmode $*.tex && mv $*.pdf ../
GENERATED_M4=$(shell find $(SOURCE_DIR)/ -name '*.m4')
GENERATED_FS=$(patsubst %.m4,%,$(GENERATED_M4))
%: %.m4 misc/utils.m4
m4 $< > $@
# TODO: fix the structure so this is easier. the JIT libs should become ASM
# libs, while the actual JIT sources only get included on whatever the native
# arch is (tunable with ARCH=... in make)
MAIN_SOURCES=$(shell find $(SOURCE_DIR)/ -name '*.c' -not -path '$(SOURCE_DIR)/jit/*')
MAIN_OBJECTS=$(patsubst $(SOURCE_DIR)/%.c,$(OBJECT_DIR)/%.o,$(MAIN_SOURCES))
MAIN_DEPENDS=$(patsubst $(SOURCE_DIR)/%.c,$(OBJECT_DIR)/%.d,$(MAIN_SOURCES))
ifeq (0, $(words $(findstring $(MAKECMDGOALS), $(NODEPS))))
ifeq (,$(findstring print-,$(MAKECMDGOALS)))
$(info Including depends files: $(MAIN_DEPENDS))
include $(MAIN_DEPENDS)
endif
endif
$(OBJECT_DIR):
mkdir $@
$(OBJECT_DIR)/%.d: $(SOURCE_DIR)/%.c | $(OBJECT_DIR) $(GENERATED_FS)
$(CC) $(CFLAGS) -MM -MT '$(patsubst $(SOURCE_DIR)/%.c,$(OBJECT_DIR)/%.o,$<)' $< -MF $@
$(OBJECT_DIR)/%.o: $(SOURCE_DIR)/%.c | $(OBJECT_DIR)
$(CC) $(CFLAGS) -c $< -o $@
arctic: $(MAIN_OBJECTS)
$(CC) $(LDFLAGS) $^ -o $@
X86_64_BIT_REGISTERS=RAX RBX RCX RDX RSI RDI RBP RSP R8 R9 R10 R11 R12 R13 R14 R15
src/jit/x86_64_mov_opcodes.h:
echo '#ifndef X86_64_MOV_OPCODES_H' > $@
echo '#define X86_64_MOV_OPCODES_H' >> $@
echo >> $@
for dstreg in {$(subst $(SPACE),$(COMMA),$(X86_64_BIT_REGISTERS))}; do \
for srcreg in {$(subst $(SPACE),$(COMMA),$(X86_64_BIT_REGISTERS))}; do \
echo "mov $$srcreg,$$dstreg" | ./tools/x86_64dump | \
sed 's/../ X(0x&)/g' | \
sed "s/.*/#define MOV_$${srcreg}_$${dstreg}&/" >> $@; \
done; \
done
echo >> $@
echo '#endif /* X86_64_MOV_OPCODES_H */' >> $@
clean:
-rm $(patsubst doc/%.md,doc/%.aux,$(DOCUMENTS))
-rm $(patsubst doc/%.md,doc/%.xdv,$(DOCUMENTS))
-rm $(patsubst doc/%.md,doc/%.out,$(DOCUMENTS))
-rm $(MAIN_OBJECTS)
-rm $(MAIN_DEPENDS)
-rm arctic
print-%:
@echo '$*=$($*)'

View file

@ -3,3 +3,13 @@
Like the JVM, but the "binaries" consist of printable ASCII characters and the
result can be more easily precompiled for native execution.
## TODO
- multiprocessing support (call-through to fork()? codeN for each process?)
- probably want to repurpose breakpoint into fork
- overall philosophy
- easy to target for compilers
- easy to implement with overhead
- possible to implement with no overhead
- can use easy but "slow" or DIY
- e.g. MALL vs static memory and custom allocation

View file

1
doc/arctic-begin.tex Normal file
View file

@ -0,0 +1 @@
\begin{document}

1
doc/arctic-footer.tex Normal file
View file

@ -0,0 +1 @@
\end{document}

12
doc/arctic-leader.tex Normal file
View file

@ -0,0 +1,12 @@
\documentclass{article}
\usepackage{hyperref}
\usepackage{tabularx}
\usepackage{tabulary}
\usepackage{booktabs}
\usepackage[normalem]{ulem}
\usepackage{glossaries}
\usepackage{soul}
\setcounter{secnumdepth}{0}
%\renewcommand{\chapternumberline}[1]{}% Gobble chapter numbers in TOC

116
doc/cheatsheet.tex Normal file
View file

@ -0,0 +1,116 @@
\documentclass{article}
\usepackage[letterpaper, landscape, margin=1cm]{geometry}
\usepackage{tabularx}
\usepackage{tabulary}
\usepackage{booktabs}
\usepackage{tikz}
\usetikzlibrary{shapes.multipart,positioning}
\tikzstyle{stack}=[rectangle split, rectangle split parts=#1,draw,anchor=center]
\newenvironment{opsummary}[2]{%
\begin{tikzpicture}
\newcommand{\opsummaryshort}{#1}
\newcommand{\opsummarylong}{#2}
}{%
\draw[->, double distance=1pt] (before) -- node[above] {\texttt{\opsummaryshort}} node[below] {\texttt{\opsummarylong}} (after);
\end{tikzpicture}
}
\begin{document}
\begin{tabulary}{\textwidth}{@{}cccccccccccccccc@{}} \toprule
\begin{opsummary}{0}{BNEZ}
\node[stack=2] (before) {%
\nodepart{one}$x$
\nodepart{two}\ldots
};
\node[stack=1,right=of before] (after) {%
\nodepart{one}\ldots
};
\end{opsummary}
&
\begin{opsummary}{1}{LDAB}
\node[stack=2] (before) {%
\nodepart{one}$p$
\nodepart{two}
};
\node[stack=2,right=of before] (after) {%
\nodepart{one}$x$
\nodepart{two}
};
\end{opsummary}
&
\begin{opsummary}{2}{LDAS}
\node[stack=2] (before) {%
\nodepart{one}$p$
\nodepart{two}
};
\node[stack=2,right=of before] (after) {%
\nodepart{one}$x$
\nodepart{two}
};
\end{opsummary}
&
\begin{opsummary}{3}{LDAI}
\node[stack=2] (before) {%
\nodepart{one}$p$
\nodepart{two}
};
\node[stack=2,right=of before] (after) {%
\nodepart{one}$x$
\nodepart{two}
};
\end{opsummary}
&
\begin{opsummary}{4}{LDAW}
\node[stack=2] (before) {%
\nodepart{one}$p$
\nodepart{two}
};
\node[stack=2,right=of before] (after) {%
\nodepart{one}$x$
\nodepart{two}
};
\end{opsummary}
&
5 &
6 &
7 &
8 &
9 &
A &
B &
C &
D &
E &
F \\
G &
H &
I &
J &
K &
L &
M &
N &
O &
P &
Q &
R &
S &
T &
U &
V \\
\end{tabulary}
\end{document}

7
doc/documentation.css Normal file
View file

@ -0,0 +1,7 @@
#instructions ~ table td:first-child {
font-family: monospace;
}
#instructions ~ table td:first-child + td {
font-family: monospace;
}

View file

@ -1,9 +1,126 @@
# ASCII Rendered, Compiled Target Invocation Code
---
Title: ARCTIC Documentation
Author: Louis A. Burke
Language: en
CSS: doc/documentation.css
LaTeX Leader: arctic-leader.tex
LaTeX Begin: arctic-begin.tex
LaTeX Footer: arctic-footer.tex
LaTeX Header Level: 3
---
# ARCTIC Code
ASCII Rendered, Cross-Target Invocation Character Codes
Like the JVM, but the "binaries" consist of printable ASCII characters and the
result can be more easily precompiled for native execution.
result can be more easily precompiled for native execution. Also far less safety
and verification, feel free to shoot yourself in the foot.
## Opcodes
## Behaviour
The behaviour of ARCTIC code is described by how a theoretical virtual machine
would interpret it. Any compiler or interpreter that produces the same series of
exernal syscalls with the same values and returns the same result as the
theoretical virtual machine is considered a compliant ARCTIC implementation.
This documentation describes the input as a "file", but it actually may consist
of multiple files or may be processed from standard input. How an ARCTIC
implementation consumes its input is up to each specific implementation, but
should be well documented.
## Structure
ARCTIC code is divided into sections. In each section, each character represents
a different operation.
### Data Types
### The Stack
### Memory
### Sections
```{=todo}
code is LE \ w.r.t
CODE is BE / LDAx
Specifically:
.* is shared file load (" libc.so")
A.* is BE atomic data
a.* is LE atomic data
[bB].* is BSS data (uninitialized, but allocated)
C.* is BE code
c.* is LE code
D.* is BE data
d.* is LE data
E.* is BE externals
e.* is LE externals
[fF].* is embedded file ("file X")
[gG].* is embedded resource ("get X")
[hH].* is embedded resource ("http...")
[iI].* is embedded code ("include X")
[jJ].* is embedded java ("jvm X")
[kK].* is ???
[lL].* is embedded arctic file ("load X")
M.* is BE macros
m.* is LE macros
[nN].* is ???
O.* is BE register-based code ("OPTIMIZED")
o.* is LE register-based code ("optimized")
P.* is BE per-thread data (POOL)
p.* is LE per-thread data (pool)
Q.* is BE quantum code ("QUANTUM")
q.* is LE quantum code ("quantum")
R.* is BE read-only data
r.* is LE read-only data
[sS].* is ???
[tT].* is ???
[uU].* is ???
[vV].* is ???
[wW].* is load module ("with X")
[xX].* is ???
[yY].* is ???
[zZ].* is ???
Need better description of data '' format. See QBE's data for inspiration?
Consider having each op-code mean something different in different sections?
Only consistency would be labels... those could be more universally parsed?
Alternatively remove sections altogether and use only dynamic memory?
```
## File Format
## Loading, Compiling, and Running
## Instructions
This section describes the various instructions that are defined for an ARCTIC
system. When stack manipulation is shown, the following conventions apply.
*Stack direction:* The stack is shown from left to right where the top of the
stack is on the right. For example `1 2 3` shows a stack with 3 on top of 2 on
top of 1.
*Ellipses:* When only the top of the stack is relevant, the rest may be
abbreviated with `…`. For example `… 10` shows a stack with a value of 10 at the
top and unknown data possibly below it.
*Production:* To indicate the change in a stack, an arrow is used. For example
`1 2 3 → 3 1 2` shows a stack containing 1, 2, 3 becoming a stack containing 3,
1, 2. Similarly, if the arrow is doubled, it indicates an immediate value is
also consumed. For example `1 2 ⇒ 3` shows a stack containing 1 and 2 becoming
a stack containing 3 after consuming an immediate value.
*Variables/Values:* Variables and values are presented as space-separated
c expressions. These are interpreted as if declared as follows:
int64_t x, y, z;
double a, b, c;
void *p, *q; // data pointers
void *f; // function pointer
```{=todo}
Notate that immediates can be: …###, …###.###, …$name, or …v (for abcinxyz)
@ -22,22 +139,490 @@ The ops below are probably code exclusive. The remaining " and ' can switch
modes? Maybe just "section" to start a section? Leaving ' as the only impl-def?
```
### Memory
The following table summarizes the memory operations that are described below.
⋄ |Code| … | Stack | … | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
1 |LDAB| … p| → | … x | load byte x from p
2 |LDAS| … p| → | … x | load short x from p
3 |LDAI| … p| → | … x | load int x from p
4 |LDAW| … p| → | … x | load word x from p
o |LDOB| … p| ⇒ | … x | load byte at p plus immediate offset in x
H |LDOS| … p| ⇒ | … x | load short at p plus immediate offset in x
G |LDOI| … p| ⇒ | … x | load int at p plus immediate offset in x
Q |LDOW| … p| ⇒ | … x | load word at p plus immediate offset in x
5 |STAB| … p x| → | … | store byte x to p
6 |STAS| … p x| → | … | store short x to p
7 |STAI| … p x| → | … | store int x to p
8 |STAW| … p x| → | … | store word x to p
D†|MCLR| … p x| → | … | memclear x bytes from p
d†|MSET| … p x| ⇒ | … | set x bytes of memory to immediate value at p
K†|MCPY| … p q x| → | … | copy x bytes of memory from p to q
J |MOFF| … p x| → | … q | set q to memory pointer p shifted by x bytes
M†|MALL| … p| ⇒ | … p | (re)allocate/free memory for immediate bytes at p
R†|REAL| … p x| → | … p | (re)allocate/free memory for x bytes at p
9 |CASS| … p x y| → | … z | compare and swap x and y at p, return success
#### LDAx
The LDAx instructions take a pointer on the top of the stack and replace it with
the value of 1, 2, 4, or 8 bytes loaded from its address as 2's compliment. The
endianness is determined by the endianness of the code section under execution.
- LDAB: `… p → … (int64)*(int8*)p`
- LDAS: `… p → … (int64)*(int16*)p`
- LDAI: `… p → … (int64)*(int32*)p`
- LDAW: `… p → … *(int64*)p`
#### LDOx
The LDOx instructions take an immediate offset following the instruction code
and a pointer on top of the stack and act like their LDAx counterparts, but
operating on the memory address specified by adding the immediate value to the
pointer.
- LDOB(i): `… p ⇒ … (int64)((int8*)p)[i]`
- LDOS(i): `… p ⇒ … (int64)((int16*)p)[i]`
- LDOI(i): `… p ⇒ … (int64)((int32*)p)[i]`
- LDOW(i): `… p ⇒ … ((int64*)p)[i]`
#### STAx
The STAx instructions take a value on top of the stack and a pointer underneath
it and write the least significant 1, 2, 4, or 8 bytes of the value to the
address of the pointer, removing both from the stack.
- STAB: `… p x → … (void)(*(int8*)p=x&0xFF)`
- STAS: `… p x → … (void)(*(int16*)p=x&0xFFFF)`
- STAI: `… p x → … (void)(*(int32*)p=x&0xFFFFFFFF)`
- STAW: `… p x → … (void)(*p=x)`
#### MOFF
The MOFF instruction takes an integer on top of the stack and a memory address
underneath it and shifts the memory address by the integer, removing the integer
in the process.
- MOFF: `… p x → … ((int8*p)+x)`
#### MCLR
The MCLR instruction takes an integer value on top of the stack and a pointer
underneath it. It then clears as many bytes of memory in order as indicated by
the integer value starting from the pointer address. Both the value and pointer
are removed from the stack.
- MCLR: `… p x → … (void)memset(p,x,0)`
#### MSET
The MSET instruction takes an immediate byte value following the instruction
code, as well as an integer value on top of the stack and a pointer underneath
it. It then sets as many bytes of memory in order as indicated by the integer
value starting from the pointer address to the immediate value. Both the value
and pointer are removed from the stack.
- MSET(i): `… p x ⇒ … (void)memset(p,x,i)`
#### MCPY
The MCPY instruction takes an integer value on top of the stack and a pair of
pointers underneath it. It then copies as many bytes of memory in order as
indicated by the integer value starting from the bottom pointer into memory
starting from the top pointer. All three arguments are removed from the stack.
- MCPY: `… p q x → … (void)memmove(q, p, x)`
#### MALL
The MALL instruction takes an immediate offset following the instruction code
and a pointer on top of the stack and allocates or reallocates memory of that
many bytes, replacing the pointer with the new pointer on the stack.
If the size is zero, this frees the memory and pushes a null pointer.
- MALL(i): `… p ⇒ … realloc(p, i)`
#### REAL
The REAL instruction takes an offset value on top of the stack and a pointer
underneath it and allocates or reallocates memory of that many bytes, replacing
the pointer with the new pointer on the stack.
If the size is zero, this frees the memory and pushes a null pointer.
- REAL: `… p x → … realloc(p, x)`
#### CASS
The CASS instruction takes a new value on top of the stack, an expected old
value underneath it, and a pointer underneath that. It atomically compares the
8 bytes of memory referenced by the pointer with the expected old value; if they
match, the memory content is updated to the new value and all three arguments
are replaced with the number 1 as a 64-bit integer; if they do not match, no
memory is updated and all three arguments are replaced with the number 0.
- CASS: `… p x y → … atomic_compare_exchange_strong(p,&x,y)`
### Math
⋄ |Code| … | Stack | … | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
\+|IADD| … x y| → | … z | set z to the sum of x and y
\-|ISUB| … x y| → | … z | set z to the difference between x and y (x - y)
\*|IMUL| … x y| → | … z | set z to the product of x and y
/ |IDIV| … x y| → | … z | set z to the quotient of x by y
% |IMOD| … x y| → | … z | set z to the modulo of x by y
\\|IREM| … x y| → | … z | set z to the remainder of x by y
\_|INEG| … x| → | … x | negate x
\||IABS| … x| → | … x | take the absolute value of x
s |FADD| … a b| → | … c | set c to the sum of a and b
m |FSUB| … a b| → | … c | set c to the difference between a and b (a - b)
p |FMUL| … a b| → | … c | set c to the product of a and b
q |FDIV| … a b| → | … c | set c to the quotient of a by b
f |FMOD| … a b| → | … c | set c to the modulo of a by b
r |FREM| … a b| → | … c | set c to the remainder of a by b
j |FNEG| … a| → | … a | negate a
v |FABS| … a| → | … a | take the absolute value of a
V |BIOR| … x y| → | … z | bitwise OR x and y and store in z
\&|BAND| … x y| → | … z | set z to the bitwise and of x and y
^ |BXOR| … x y| → | … z | set z to the bitwise xor of x and y
l |BITC| … x y| → | … z | bit clear y from x to z (z = x and not y)
! |BNOT| … x| → | … y | set y to the bitwise not of x
u |USHR| … x| ⇒ | … x | logical shift x by immediate bits right
[ |ROTR| … x| ⇒ | … x | rotate x by immediate bits
] |BSHR| … x| ⇒ | … x | arithmetic right shift x by immediate bits
? |CMPI| … x y| → | … z | compare x to y and set z such that x o y is z o 0
~ |CMPF| … a b| ⇒ | … c | compare a to b and set c such that a o b is z o c within immediate ULPs
U |CMPU| … x y| → | … z | compare x to y unsigned and set z such that x o y is z o 0
F |BMIS| … x| ⇒ | … x | bit manipulation instructions
#### xADD/xSUB/xMUL/xDIV/xMOD/xREM
The xADD/xSUB/xMUL/xDIV/xMOD/xREM instructions take two values on top of the
stack (either two integers or two floating point numbers) and perform the
specified mathematical operation, replacing the values with the result of the
operation.
The difference between MOD and REM is that MOD is floored while REM is
truncated.
- IADD: `… x y → (x+y)`
- FADD: `… a b → (a+b)`
- ISUB: `… x y → (x-y)`
- FSUB: `… a b → (a-b)`
- IMUL: `… x y → (x*y)`
- FMUL: `… a b → (a*b)`
- IDIV: `… x y → (x/y)`
- FDIV: `… a b → (a/b)`
- IMOD: `… x y → (y>0?abs(x%y):-abs(x%y))`
- FMOD: `… a b → (b>0?fabs(fmod(x,y)):-fabs(fmod(x,y)))`
- IREM: `… x y → x%y`
- FREM: `… a b → fmod(x,y)`
#### xNEG/xABS
The xNEG/xABS instructions take a value on top of the stack (either an integer
or a floating point number) and replaces it with either its negated value or its
absolute value.
- INEG: `… x → -x`
- FNEG: `… a → -a`
- IABS: `… x → abs(x)`
- FABS: `… a → fabs(a)`
#### BIOR/BAND/BXOR/BITC
The BIOR/BAND/BXOR/BITC instructions take two values on top of the stack and
perform the specified bitwise operation, replacing the values with the result of
the operation.
BITC is a single instruction that performs A AND NOT B between its arguments.
- BIOR: `… x y → x|y`
- BAND: `… x y → x&y`
- BXOR: `… x y → x^y`
- BITC: `… x y → x&~y`
#### USHR/BSHR/ROTR
The USHR/BSHR instructions take a value on top of the stack and an
immediate offset and shift the value on top of the stack that number of bits in
the given direction.
USHR and BSHR differ in that BSHR performs an arithmetic shift, duplicating the
highest order bit instead of shifting in a 0.
ROTR performs a rightward rotation, however with a negative shift amount, it
performs a leftward rotation.
- USHR(i): `… x ⇒ ((uint64_t)x)>>i`
- BSHR(i): `… x ⇒ x>>i`
- ROTR(i): `… x ⇒ (x>>shift)|(value<<(64-shift))`
#### CMPx
The CMPx instructions perform comparisons between two values on top of the stack
and replace them with either 1, 0, or -1 if the top value is less than, equal
to, or greater than the second value respectively. In the case of CMPF,
a floating point precision is specified by an immediate value, with equality
spanning all values within the given precision of each other.
- CMPI: `… x y → ((x-y)>0)-((x-y)<0)`
- CMPF(i): `… a b ⇒ fabs(a-b)<i?0:((a-b)>0)-((a-b)<0)`
- CMPU: `… x y → (((uint64_t)x-(uint64_t)y)>0)-(((uint64_t)x-(uint64_t)y)<0)`
#### BMIS
The BMIS instruction bundles many unary bitwise operators into a single
instruction, indexed by an immediate integer based on the following table:
Immediate | Operation
-----------|--------------------------------------------------------------------
0 | ???
### Type Conversions
⋄ |Code| … | Stack | … | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
E |FTOI| … a| ⇒ | … x | round a to integer and store in x
O |ITOC| … x| → | … x | truncate x to 8 bits, then sign extend
S |ITOS| … x| → | … x | truncate x to 16 bits, then sign extend
W |ITOW| … x| → | … x | truncate x to 32 bits, then sign extend
. |ITOF| … x| → | … a | convert x to a floating point number
#### FTOI
The FTOI instruction takes a floating point value on top of the stack and
replaces it with an integer by rounding. The direction of rounding is determined
by the immediate integer value:
Value | Rounding Mode
------:|:-----------------------------------------------------------------------
1 | Round towards infinity. "Round up"
-1 | Round towards negative infinity. "Round down"
0 | Round towards 0. "Truncate"
others| Round to even. "Unbiased"
- FTOI(1): `… a ⇒ fesetround(FE_UPWARD),(int64_t)a`
- FTOI(-1): `… a ⇒ fesetround(FE_DOWNWARD),(int64_t)a`
- FTOI(0): `… a ⇒ fesetround(FE_TOWARDZERO),(int64_t)a`
- FTOI(i): `… a ⇒ fesetround(FE_TONEAREST),(int64_t)a`
#### ITOC/ITOS/ITOW
The ITOC/ITOS/ITOW instructions take a 64-bit integer on top of the stack and
replace it with a truncated and sign extended smaller integer.
- ITOC: `… x → (int64_t)*(int8_t*)&x`
- ITOS: `… x → (int64_t)*(int16_t*)&x`
- ITOW: `… x → (int64_t)*(int32_t*)&x`
#### ITOF
The ITOF instruction takes a 64-bit integer on top of the stack and replaces it
with the closest 64-bit floating point approximation.
- ITOF: `… x → (double)x`
### Stack Management
⋄ |Code| … | Stack | … | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
w |SDUP| … x| → | … x x | stack duplicate
k |SPOP| … x| → | … | stack pop
t |SWAP| … x y| → | … y x | stack swap
h |OVER| … x y| → | … x y x | stack over
e |SROT| … x y z| → | … y z x | rotate stack
P |PICK| … x ‥ z| ⇒ | … x ‥ z x | pick stack element at immediate place
L |ROLL| … x y ‥ z| ⇒ | … y ‥ z x | roll stack by immediate places
T |PUSH| … x ‥ y z| ⇒ | … y x ‥ z | push top of stack down by immediate places (opposite of ROLL)
( |DPTH| …| → | … x | set x to depth of stack (before x)
) |PACK| … p x| → | … p | pack x elements of stack (before p) into array p
#### SDUP
The SDUP instruction takes a value on top of the stack and pushes a copy of it
to the top of the stack.
- SDUP: `… x → x x`
#### SPOP
The SPOP instruction removes the value on top of the stack.
- SPOP: `… x → `
#### SWAP/OVER/SROT
The SWAP/OVER/SROT instructions manipulate the values near the top of the stack.
- SWAP: `… x y → … y x`
- OVER: `… x y → … x y x`
- SROT: `… x y z → … y z x`
#### PICK
The PICK instruction acts like an OVER command with an arbitrary depth, copying an
element indicated by the immediate value to the top of the stack. OVER can be
defined in terms of PICK as `P1` while SDUP is equivalent to `P0`.
- PICK(i): `… x …i… y ⇒ … x …i… y x`
#### ROLL/PUSH
The ROLL and PUSH instructions act like SROT with an arbitrary depth, allowing
a value from any depth in the stack to be rotated to the top or pushed back down
to depth. SROT can be defined in terms of ROLL as `L1` while SWAP is equivalent
to `L0` or `T0`.
- ROLL(i): `… x y …i… z ⇒ … y …i… z x`
- PUSH(i): `… x …i… y z ⇒ … z x …i… y`
#### DPTH/PACK
The DPTH instruction pushes the total depth of the stack on top of the stack.
The PACK instruction takes an integer on top of a pointer and saves the top
elements of the stack beneath the pointer to the memory pointed to by it.
These can be combined to store the entire stack in a new array via: `(w$0 Rt)`
- DPTH: `…x… → …x… x`
- PACK: `… p x → p`
### Control Flow
⋄ |Code| … | Stack | … | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
\=|BEQZ| … x| ⇒ | … | branch to immediate if x is zero
0 |BNEZ| … x| ⇒ | … | branch to immediate if x not zero
\<|BLTZ| … x| ⇒ | … | branch to immediate if x less than zero
\>|BGTZ| … x| ⇒ | … | branch to immediate if x is greater than zero
{ |BLEZ| … x| ⇒ | … | branch to immediate if x is less than or equal to zero
} |BGEZ| … x| ⇒ | … | branch to immediate if x is greater or equal to zero
g |FINV| … f| → | f … | invoke f, saving return address on stack
@ |CALL| …| ⇒ | f … | call immediate name
, |JUMP| …| ⇒ | … | jump to immediate without pushing return address
; |RETN| … f| → | … | return from subroutine (jump but for stack)
\:|LABL| … | ↔ | … | label code location
#### BEQZ/BNEZ/BLTZ/BGTZ/BLEZ/BGEZ
The BEQZ/BNEZ/BLTZ/BGTZ/BLEZ/BGEZ instructions are the basic branching
instructions. They take an immediate offset and, if their condition is met, jump
that many beats (lines) forward or backward.
- BEQZ(i): `… x ⇒ …`
- BNEZ(i): `… x ⇒ …`
- BLTZ(i): `… x ⇒ …`
- BGTZ(i): `… x ⇒ …`
- BLEZ(i): `… x ⇒ …`
- BGEZ(i): `… x ⇒ …`
#### FINV/CALL
The FINV/CALL instructions invoke functions by jumping to their first
instruction after pushing the address of the next instruction on the stack.
- FINV: `… f → g …`
- CALL(f): `… → g …`
#### JUMP/RETN
The JUMP/RETN instructions are the complement to the FINV/CALL instructions.
They jump to the first instruction of the given function, but do not push
a return address on the stack.
- JUMP(f): `… ⇒ …`
- RETN: `… f ⇒ …`
#### LABL
The LABL instruction takes an immediate name and marks the next current beat in
memory as having that name for future or past references.
- LABL: `… ↔ …`
### Variables
⋄ |Code| … | Stack | … | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
A |PUTA| … x| → | … | store x in rA
B |PUTB| … x| → | … | store x in rB
C |PUTC| … x| → | … | store x in rC
I |PUTI| … x| → | … | store x in rI
N |PUTN| … x| → | … | store x in rN
X |PUTX| … x| → | … | store x in rX
Y |PUTY| … x| → | … | store x in rY
Z |PUTZ| … x| → | … | store x in rZ
a |GETA| …| → | … x | load x from rA
b |GETB| …| → | … x | load x from rB
c |GETC| …| → | … x | load x from rC
i |GETI| …| → | … x | load x from rI
n |GETN| …| → | … x | load x from rN
x |GETX| …| → | … x | load x from rX
y |GETY| …| → | … x | load x from rY
z |GETZ| …| → | … x | load x from rZ
#### PUTA/PUTB/PUTC/PUTI/PUTN/PUTX/PUTY/PUTZ
The PUTA/PUTB/PUTC/PUTI/PUTN/PUTX/PUTY/PUTZ instructions take a value on top of
the stack and store it in a register for later recall.
- PUTA: `… x → …`
- PUTB: `… x → …`
- PUTC: `… x → …`
- PUTI: `… x → …`
- PUTN: `… x → …`
- PUTX: `… x → …`
- PUTY: `… x → …`
- PUTZ: `… x → …`
#### GETA/GETB/GETC/GETI/GETN/GETX/GETY/GETZ
The GETA/GETB/GETC/GETI/GETN/GETX/GETY/GETZ instructions push the previously
saved value in a register onto the stack.
- GETA: `… → … x`
- GETB: `… → … x`
- GETC: `… → … x`
- GETI: `… → … x`
- GETN: `… → … x`
- GETX: `… → … x`
- GETY: `… → … x`
- GETZ: `… → … x`
### Miscellaneous
⋄ |Code| … | Stack | … | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
$ |VALU| … | ⇒ | … \* | load constant value
' |DATA| | | | Embed data
" |SECT| | | | Change section
\`|BIFC| … ? | ↔ | … ? | call builtin with given name/value
␣ |NOOP| … | ↔ | … | do nothing, maybe end identifier definition
¶ |BEAT| … | ↔ | … | mark a beat for relative branching
### Summary
| -0 | -1 | -2 | -3 | -4 | -5 | -6 | -7
. | -0 | -1 | -2 | -3 | -4 | -5 | -6 | -7
---:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:
000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI`
001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F FREE`
010-|`G LDOI`|`H LDOS`|`I PUTI`|`J MOFF`|`K MCPY`|`L ROLL`|`M MNEW`|`N PUTN`
011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T PUSH`|`U CMPU`|`V B_OR`
001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F BMIS`
010-|`G LDOI`|`H LDOS`|`I PUTI`|`J MOFF`|`K MCPY`|`L ROLL`|`M MALL`|`N PUTN`
011-|`O ITOC`|`P PICK`|`Q LDOW`|`R REAL`|`S ITOS`|`T PUSH`|`U CMPU`|`V BIOR`
100-|`W ITOW`|`X PUTX`|`Y PUTY`|`Z PUTZ`|`a GETA`|`b GETB`|`c GETC`|`d MSET`
101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC`
110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP`
111-|`u USHR`|`v FABS`|`w SDUP`|`x GETX`|`y GETY`|`z GETZ`|`_ INEG`|`. ITOF`
200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU`
200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`\| IABS`|`$ VALU`
201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN`
210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ BSHL`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL`
211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT`
210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ ROTR`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL`
211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BIFC`|`␣ NOP`|`# COMM`|`¶ BEAT`
[Operations by code][ops-by-code]
In this table x, y, and z are integers; a, b, and c are floating point numbers;
@ -45,101 +630,101 @@ p and q are memory pointers; and f is a function pointer. Additionally,
→ indicates a basic operation, while ⇒ indicates an operation using an immediate
value, and ↔ indicates a meta operation.
Code | Stack | Description
-----:|:-------------------:|:--------------------------------------------------
0 BNEZ| … x ⇒ … | branch to immediate if x not zero
1 LDAB| … p → … x | load byte x from p
2 LDAS| … p → … x | load short x from p
3 LDAI| … p → … x | load int x from p
4 LDAW| … p → … x | load word x from p
5 STAB| … p x → … | store byte x to p
6 STAS| … p x → … | store short x to p
7 STAI| … p x → … | store int x to p
8 STAW| … p x → … | store word x to p
9 CASS| … p x y → … z | compare and swap x and y at p, return success
A PUTA| … x → … | store x in rA
B PUTB| … x → … | store x in rB
C PUTC| … x → … | store x in rC
D MCLR| … p x → … | memclear x bytes from p
E FTOI| … a → … x | round a to integer and store in x
F FREE| … p → … | free memory at p
G LDOI| … p ⇒ … x | load int at p plus immediate offset in x
H LDOS| … p ⇒ … x | load short at p plus immediate offset in x
I PUTI| … x → … | store x in rI
J MOFF| … p x → … q | set q to memory pointer p shifted by x bytes
K MCPY| … p q x → … | copy x bytes of memory from p to q
L ROLL| … x y ‥ z ⇒ … y ‥ z x | roll stack by immediate places
M MNEW| … ⇒ … p | allocate memory for immediate bytes at p
N PUTN| … x → … | store x in rN
O ITOC| … x → … x | truncate x to 8 bits, then sign extend
P PICK| … x ‥ z ⇒ … x ‥ z x | pick stack element at immediate place
Q LDOW| … p ⇒ … x | load word at p plus immediate offset in x
R MALL| … x → … p | allocate memory for x bytes at p
S ITOS| … x → … x | truncate x to 16 bits, then sign extend
T PUSH| … x . y z ⇒ … y x ‥ z | push top of stack down by immediate places (opposite of ROLL)
U CMPU| … x y → … z | compare x to y unsigned and set z such that x o y is z o 0
V B_OR| … x y → … z | bitwise OR x and y and store in z
W ITOW| … x → … x | truncate x to 32 bits, then sign extend
X PUTX| … x → … | store x in rX
Y PUTY| … x → … | store x in rY
Z PUTZ| … x → … | store x in rZ
a GETA| … → … x | load x from rA
b GETB| … → … x | load x from rB
c GETC| … → … x | load x from rC
d MSET| … p x ⇒ … | set x bytes of memory to immediate value at p
e SROT| … x y z → … y z x | rotate stack
f FMOD| … a b → … c | set c to the modulo of a by b
g FINV| … f → f … | invoke f, saving return address on stack
h OVER| … x y → … x y x | stack over
i GETI| … → … x | load x from rI
j FNEG| … a → … a | negate a
k SPOP| … x → … | stack pop
l BITC| … x y → … z | bit clear y from x to z (z = x and not y)
m FSUB| … a b → … c | set c to the difference between a and b (a - b)
n GETN| … → … x | load x from rN
o LDOB| … p ⇒ … x | load byte at p plus immediate offset in x
p FMUL| … a b → … c | set c to the product of a and b
q FDIV| … a b → … c | set c to the quotient of a by b
r FREM| … a b → … c | set c to the remainder of a by b
s FADD| … a b → … c | set c to the sum of a and b
t SWAP| … x y → … y x | stack swap
u USHR| … x ⇒ … x | logical shift x by immediate bits right
v FABS| … a → … a | take the absolute value of a
w SDUP| … x → … x x | stack duplicate
x GETX| … → … x | load x from rX
y GETY| … → … x | load x from rY
z GETZ| … → … x | load x from rZ
_ INEG| … x → … x | negate x
. ITOF| … x → … a | convert x to a floating point number
\+ IADD|… x y → … z | set z to the sum of x and y
\- ISUB|… x y → … z | set z to the difference between x and y (x - y)
\* IMUL|… x y → … z | set z to the product of x and y
/ IDIV| … x y → … z | set z to the quotient of x by y
% IMOD| … x y → … z | set z to the modulo of x by y
\ IREM| … x y → … z | set z to the remainder of x by y
\| IABS|… x → … x | take the absolute value of x
$ VALU| … ↔ … * | load constant value
@ CALL| … ↔ … | call immediate name
< BLTZ| x | branch to immediate if x less than zero
{ BLEZ| … x ⇒ … | branch to immediate if x is less than or equal to zero
\= BEQZ|… x ⇒ … | branch to immediate if x is zero
} BGEZ| … x ⇒ … | branch to immediate if x is greater or equal to zero
\> BGTZ|… x ⇒ … | branch to immediate if x is greater than zero
, JUMP| … ⇒ … | jump to immediate without pushing return address
; RETN| … f ⇒ … | return from subroutine (jump but for stack)
& BAND| … x y → … z | set z to the bitwise and of x and y
^ BXOR| … x y → … z | set z to the bitwise xor of x and y
! BNOT| … x → y | set y to the bitwise not of x
[ BSHL| … x ⇒ … x | left shift x by immediate bits
] BSHR| … x ⇒ … x | arithmetic right shift x by immediate bits
? CMPI| … x y → … z | compare x to y and set z such that x o y is z o 0
~ CMPF| … a b ⇒ … c | compare a to b and set c such that a o b is z o c within an immediate error
\: LABL|… f ↔ … | label code location
( DPTH| … → … x | set x to depth of stack (before x)
) PACK| … p x → … | pack x elements of stack (before p) into array p
' DATA| | Embed data
" SECT| | Change section
\` BKPT| … ↔ … | trigger breakpoint, or exit if not debugging
␣ NOOP| … ↔ … | do nothing, maybe end identifier definition
¶ BEAT| … ↔ … | mark a beat for relative branching
|Code| … | Stack | | Description
--|---:|----------:|:-:|:----------------|:-------------------------------------
0 |BNEZ| … x|| | branch to immediate if x not zero
1 |LDAB| … p|| … x | load byte x from p
2 |LDAS| … p|| … x | load short x from p
3 |LDAI| … p|| … x | load int x from p
4 |LDAW| … p|| … x | load word x from p
5 |STAB| … p x|| | store byte x to p
6 |STAS| … p x|| | store short x to p
7 |STAI| … p x|| | store int x to p
8 |STAW| … p x|| | store word x to p
9 |CASS| … p x y| | … z | compare and swap x and y at p, return success
A |PUTA| … x|| | store x in rA
B |PUTB| … x|| | store x in rB
C |PUTC| … x|| | store x in rC
D |MCLR| … p x|| | memclear x bytes from p
E |FTOI| … a| ⇒ | … x | round a to integer and store in x
F |BMIS| … x| ⇒ | … x | bit manipulation instructions
G |LDOI| … p|| … x | load int at p plus immediate offset in x
H |LDOS| … p|| … x | load short at p plus immediate offset in x
I |PUTI| … x|| | store x in rI
J |MOFF| … p x|| … q | set q to memory pointer p shifted by x bytes
K |MCPY| … p q x| | | copy x bytes of memory from p to q
L |ROLL| … x y ‥ z| | … y ‥ z x | roll stack by immediate places
M |MALL| … p| ⇒ | … p | (re)allocate/free memory for immediate bytes at p
N |PUTN| … x|| | store x in rN
O |ITOC| … x|| … x | truncate x to 8 bits, then sign extend
P |PICK| … x ‥ z| | … x ‥ z x | pick stack element at immediate place
Q |LDOW| … p|| … x | load word at p plus immediate offset in x
R |REAL| … p x| → | … p | (re)allocate/free memory for x bytes at p
S |ITOS| … x|| … x | truncate x to 16 bits, then sign extend
T |PUSH| … x ‥ y z| ⇒ | … y x ‥ z | push top of stack down by immediate places (opposite of ROLL)
U |CMPU| … x y|| … z | compare x to y unsigned and set z such that x o y is z o 0
V |BIOR| … x y| → | … z | bitwise OR x and y and store in z
W |ITOW| … x|| … x | truncate x to 32 bits, then sign extend
X |PUTX| … x|| | store x in rX
Y |PUTY| … x|| | store x in rY
Z |PUTZ| … x|| | store x in rZ
a |GETA| | | … x | load x from rA
b |GETB| | | … x | load x from rB
c |GETC| | | … x | load x from rC
d |MSET| … p x|| | set x bytes of memory to immediate value at p
e |SROT| … x y z| | … y z x | rotate stack
f |FMOD| … a b|| … c | set c to the modulo of a by b
g |FINV| … f|| f … | invoke f, saving return address on stack
h |OVER| … x y|| … x y x | stack over
i |GETI| | | … x | load x from rI
j |FNEG| … a|| … a | negate a
k |SPOP| … x|| | stack pop
l |BITC| … x y|| … z | bit clear y from x to z (z = x and not y)
m |FSUB| … a b|| … c | set c to the difference between a and b (a - b)
n |GETN| | | … x | load x from rN
o |LDOB| … p|| … x | load byte at p plus immediate offset in x
p |FMUL| … a b|| … c | set c to the product of a and b
q |FDIV| … a b|| … c | set c to the quotient of a by b
r |FREM| … a b|| … c | set c to the remainder of a by b
s |FADD| … a b|| … c | set c to the sum of a and b
t |SWAP| … x y|| … y x | stack swap
u |USHR| … x|| … x | logical shift x by immediate bits right
v |FABS| … a|| … a | take the absolute value of a
w |SDUP| … x|| … x x | stack duplicate
x |GETX| | | … x | load x from rX
y |GETY| | | … x | load x from rY
z |GETZ| | | … x | load x from rZ
\_|INEG| … x| → | … x | negate x
. |ITOF| … x|| … a | convert x to a floating point number
\+|IADD| … x y| → | … z | set z to the sum of x and y
\-|ISUB| … x y| → | … z | set z to the difference between x and y (x - y)
\*|IMUL| … x y| → | … z | set z to the product of x and y
/ |IDIV| … x y|| … z | set z to the quotient of x by y
% |IMOD| … x y|| … z | set z to the modulo of x by y
\\|IREM| … x y| → | … z | set z to the remainder of x by y
\||IABS| … x| → | … x | take the absolute value of x
$ |VALU| …| ⇒ | … \* | load constant value
@ |CALL| …| → | f … | call immediate name
\<|BLTZ| … x| ⇒ | … | branch to immediate if x less than zero
{ |BLEZ| … x|| | branch to immediate if x is less than or equal to zero
\=|BEQZ| … x| ⇒ | … | branch to immediate if x is zero
} |BGEZ| … x|| | branch to immediate if x is greater or equal to zero
\>|BGTZ| … x| ⇒ | … | branch to immediate if x is greater than zero
, |JUMP| | | … | jump to immediate without pushing return address
; |RETN| … f|| | return from subroutine (jump but for stack)
\&|BAND| … x y| → | … z | set z to the bitwise and of x and y
^ |BXOR| … x y|| … z | set z to the bitwise xor of x and y
! |BNOT| … x|| y | set y to the bitwise not of x
[ |ROTR| … x| ⇒ | … x | rotate x by immediate bits
] |BSHR| … x|| … x | arithmetic right shift x by immediate bits
? |CMPI| … x y|| … z | compare x to y and set z such that x o y is z o 0
~ |CMPF| … a b|| … c | compare a to b and set c such that a o b is z o c within an immediate error
\:|LABL| … | ↔ | … | label code location
( |DPTH| | | … x | set x to depth of stack (before x)
) |PACK| … p x|| p | pack x elements of stack (before p) into array p
' |DATA| | | | Embed data
" |SECT| | | | Change section
\`|BIFC| … ? | ↔ | … ? | call builtin with given name/value
|NOOP| | | … | do nothing, maybe end identifier definition
|BEAT| | | … | mark a beat for relative branching
[Operations in order][ops-in-order]

26
doc/overview.md Normal file
View file

@ -0,0 +1,26 @@
---
Title: ARCTIC Documentation
Author: Louis A. Burke
Language: en
CSS: documentation.css
LaTeX Leader: arctic-leader.tex
LaTeX Begin: arctic-begin.tex
LaTeX Footer: arctic-footer.tex
LaTeX Header Level: 3
---
# ARCTIC Code
ASCII Rendered, Cross-Target Invocation Character Codes
An executable "binary" format consisting of only ASCII characters.
## Goals
The priorities of this project are:
- Reasonable performance
- Easy to compile to
- Reasonable to hand-write
- Reasonable to debug
- Possible to interface to native environments

50
misc/utils.m4 Normal file
View file

@ -0,0 +1,50 @@
divert(`-1')dnl
#changequote(`[', `]')dnl
define(`TICK', changequote([,])[changequote([,])'changequote(`,')]changequote(`,'))
# forloop(var, from, to, stmt) - improved version:
# works even if VAR is not a strict macro name
# performs sanity check that FROM is larger than TO
# allows complex numerical expressions in TO and FROM
define(`forloop', `ifelse(eval(`($2) <= ($3)'), `1',
`pushdef(`$1')_$0(`$1', eval(`$2'),
eval(`$3'), `$4')popdef(`$1')')')
define(`_forloop',
`define(`$1', `$2')$4`'ifelse(`$2', `$3', `',
`$0(`$1', incr(`$2'), `$3', `$4')')')
# foreachq(x, `item_1, item_2, ..., item_n', stmt)
# quoted list, version based on forloop
define(`foreachq',
`ifelse(`$2', `', `', `_$0(`$1', `$3', $2)')')
define(`_foreachq',
`pushdef(`$1', forloop(`$1', `3', `$#',
`$0_(`1', `2', indir(`$1'))')`popdef(
`$1')')indir(`$1', $@)')
define(`_foreachq_',
``define(`$$1', `$$3')$$2`''')
# foreach(x, (item_1, item_2, ..., item_n), stmt)
# parenthesized list, improved version
define(`foreach', `pushdef(`$1')_$0(`$1',
(dquote(dquote_elt$2)), `$3')popdef(`$1')')
define(`_arg1', `$1')
define(`_foreach', `ifelse(`$2', `(`')', `',
`define(`$1', _arg1$2)$3`'$0(`$1', (dquote(shift$2)), `$3')')')
# hextox(x)
# hexadecimal number to series of X(0xZZ)X(0xZZ)...
define(`hextox', `patsubst(`$1', `..', `X(0x\&)')')
# hashtag(x)
# expands to #x
define(`hashtag', `changecom()dnl
#$1`'dnl
changecom(`#')')
# rtrimn(x, n)
# expands to all but the last n characters of x
define(`rtrimn', `substr($1, 0, eval(len($1)-$2))')
divert(`0')dnl

11
samples/asm.arx Normal file
View file

@ -0,0 +1,11 @@
#!/usr/bin/env arctic
# This is a basic ARCTIC assembler, written in ARCTIC.
@main
"data" # read-only memory
:opcodes '"BNEZLDABLDASLDAILDAWSTABSTASSTAISTAWCASSPUTAPUTBPUTCMCLRFTOIBMISLDOILDOSPUTIMOFFMCPYROLLMALLPUTNITOCPICKLDOWREALITOSPUSHCMPUBIORITOWPUTXPUTYPUTZGETAGETBGETCMSETSROTFMODFINVOVERGETIFNEGSPOPBITCFSUBGETNLDOBFMULFDIVFREMFADDSWAPUSHRFABSSDUPGETXGETYGETZINEGITOFIADDISUBIMULIDIVIMODIREMIABSVALUCALLBLTZBLEZBEQZBGEZBGTZJUMPRETNBANDBXORBNOTROTRBSHRCMPICMPFLABLDPTHPACKDATASECTBIFCNOOPBEAT"'
"state" # initialized read-write memory

9
samples/hello.ctc Normal file
View file

@ -0,0 +1,9 @@
#!/usr/bin/env arctic
@main
"data"
:message '"Hello, World!\x0A"'
"code"
:main $15 $message $1 `1 $0 ;

View file

@ -1,29 +1,41 @@
#!/usr/bin/env arctic
# TODO: consider .ctc format instead?
@main
"data" # read-only memory
:value '10' # 1 int
:array '[1, 2, 3]' # 3 int array
:values 'i10w10s10b10f10' # 1 each of int/word/short/byte/double in LE order
:hexval 'I10W10S10B10F10' # 1 each of int/word/short/byte/double in BE order
:bytes 'xffffffx' # 3 hexadecimal bytes
:string '"hello"' # 5 utf-8 encoded bytes
:array 'i1i2i3' # 3 int array
"state" # initialized read-write memory
:variable '10' # 1 int
:buffer '[1, 2, 3]' # 3 int array
:variable 'i10' # 1 int
:buffer 'i1i2i3' # 3 int array
"memory" # uninitialized read-write memory
:pool '{10}' # 10 byte pool
"constants" # ephemeral data
:const '10' # 1 universal integer
:const 'i10' # 1 universal integer
"macros" # ephemeral code
:foo
:foo '[
# body of foo here
]'
"code" # loaded code
:bar
# body of bar here
:main
:_baz # private label, not "exported" to other "code" sections
# body of baz here
"code" # more loaded code, but in here _baz not yet defined
# in general, all labels beginning with _ are section-local
"extern libfoo"
:foobar # loaded from either libfoo.a or libfoo.so

View file

@ -0,0 +1,5 @@
" Description: ARCTIC ftdetect file
" Language: ARCTIC (2024)
" Maintainer: Louis Burke
autocmd BufRead,BufNewFile *.arx set filetype=arctic

View file

@ -0,0 +1,33 @@
" Description: ARCTIC syntax file
" Language: ARCTIC (2024)
" Maintainer: Louis Burke
if exists("b:current_syntax") || version < 700
finish
endif
let b:current_syntax = "arctic"
syntax match arcticComment "\v#.*$"
highlight link arcticComment Comment
syntax region arcticData start=/'/ end=/'/
highlight link arcticData Character
syntax region arcticSection start=/"/ end=/"/
highlight link arcticSection PreProc
syntax match arcticLabel "\v:[^ ]*"
highlight link arcticLabel Label
syntax match arcticCall "\v\@[^ ]*"
highlight link arcticCall Function
syntax match arcticConstant "\v\$[^ ]*"
highlight link arcticConstant Constant
syntax match arcticImmediate "\v[][0EFGHLMPQTdou$@<{=}>,`][^ ]*"
highlight link arcticImmediate Statement
syntax match arcticOperator "\v[-123456789ABCDIJKNORSUVWXYZabcefghijklmnpqrstvwxyz_.+*/%\|;&^!?()]"
highlight link arcticOperator Operator

68
src/arch/x86_64.h.m4 Normal file
View file

@ -0,0 +1,68 @@
#ifndef X86_64_H
#define X86_64_H
#include "../utils.h"
/*******************************************************************************
include(`misc/utils.m4')dnl
divert(`-1')dnl
define(`registers', `RAX, RCX, RDX, RBX, R8, R9, R10, R11, R12, R13, R14, R15')
define(`x86_encoding_of', `translit(esyscmd(`echo 'TICK`$1'TICK` | ./tools/x86_64dump'),`
')')
define(`assemble', `hextox(x86_encoding_of($1)) Y("$1")')
divert(`0')dnl
Types:
R: registers
V: BYTE(X), WORD(X)
Q: QWORD(X)
To use these macros, either define X(x) as (x), to make lists of bytes, or
define Y(s) as whatever you want to do with a string literal. Keep the other
defined as an empty expansion.
*******************************************************************************/
#define PUSH(A_RV) PUSH_ ## A_RV
#define POP(A_R) POP_ ## A_R
#define MOV(DST_R,SRC_RV) MOV_ ## DST_R ## _ ## SRC_RV
#define ADD(DST_R,SRC_RV) ADD_ ## DST_R ## _ ## SRC_RV
#define SUB(DST_R,SRC_RV) SUB_ ## DST_R ## _ ## SRC_RV
/*******************************************************************************
Implementations (auto generated):
*******************************************************************************/
#define XX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x))
#define XXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x))
#define XXXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x)) X(BYTE_0xFF000000(x))
foreachq(register, `registers', `hashtag(define) PUSH_`'register`' assemble(`push register')
')
#define PUSH_BYTE(x) X(0x6a) X(x) Y("push " #x)
#define PUSH_WORD(x) X(0x86) XXXX(x) Y("push " #x)
foreachq(register, `registers', `hashtag(define) POP_`'register`' assemble(`pop register')
')
foreachq(registera, `registers', `
foreachq(registerb, `registers', `ifelse(registera,registerb,,hashtag(define) MOV_`'registera`'_`'registerb`' assemble(`mov registera`,'registerb')
)')')
foreachq(register, `registers', `hashtag(define) MOV_`'register`'_WORD(x) hextox(rtrimn(x86_encoding_of(`mov 'register`, 0'), 8)) XXXX(x) Y("mov register," hashtag(x))
')
foreachq(registera, `registers', `
foreachq(registerb, `registers', `ifelse(registera,registerb,,hashtag(define) ADD_`'registera`'_`'registerb`' assemble(`add registera`,'registerb')
)')')
foreachq(register, `registers', `hashtag(define) ADD_`'register`'_WORD(x) hextox(rtrimn(x86_encoding_of(`add 'register`, 0x12345678'), 8)) XXXX(x) Y("add register," hashtag(x))
')
foreachq(registera, `registers', `
foreachq(registerb, `registers', `ifelse(registera,registerb,,hashtag(define) SUB_`'registera`'_`'registerb`' assemble(`sub registera`,'registerb')
)')')
foreachq(register, `registers', `hashtag(define) SUB_`'register`'_WORD(x) hextox(rtrimn(x86_encoding_of(`sub 'register`, 0x12345678'), 8)) XXXX(x) Y("sub register," hashtag(x))
')
#endif /* X86_64_H */

3
src/architectures.c Normal file
View file

@ -0,0 +1,3 @@
#include "architectures.h"
#include "arch/x86_64.h"

8
src/architectures.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef ARCHITECTURES_H
#define ARCHITECTURES_H
enum Architecture {
ARCH_NATIVE
};
#endif /* ARCHITECTURES_H */

View file

@ -1,122 +0,0 @@
#include "arctic.h"
#include <string.h>
const char ARCTIC_CODE_PAGE[97] =
"0123456789ABCDEF"
"GHIJKLMNOPQRSTUV"
"WXYZabcdefghijkl"
"mnopqrstuvwxyz_."
"+-*/%\\|$@<{=}>,;"
"&^![]?~:()'\"` #\n"
;
/* ops with an immediate argument */
#define FOR_IMMEDIATE_OPS(X) \
X('0') X('G') X('H') X('L') X('M') X('P') X('Q') X('d') X('o') X('u') \
X('<') X('{') X('=') X('}') X('>') X(',') X(';') X('[') X(']')
#define FOR_PLAIN_OPS(X) \
X('1') X('2') X('3') X('4') X('5') X('6') X('7') X('8') X('9') X('A') \
X('B') X('C') X('D') X('E') X('F') X('I') X('J') X('K') X('N') X('O') \
X('R') X('S') X('U') X('V') X('W') X('X') X('Y') X('Z') X('a') X('b') \
X('c') X('e') X('f') X('g') X('h') X('i') X('j') X('k') X('l') X('m') \
X('n') X('p') X('q') X('r') X('s') X('t') X('v') X('w') X('x') X('y') \
X('z') X('_') X('.') X('+') X('-') X('*') X('/') X('%') X('\\') X('|') \
X('&') X('^') X('!') X('?') X('(') X(')') X('`')
#define CASE(X, ...) case X:
#define chrncat(str, chr, n, fail) do { \
char *last; \
for (last = str; *last; last++) { \
if (last >= str + n) { \
fail; \
} \
} \
*last++ = chr; \
*last = 0; \
} while (0)
enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next) {
switch (scanner->buf[0]) {
case 0: /* initial state */
switch (next) {
case ' ':
return ARCTIC_OK;
FOR_IMMEDIATE_OPS(CASE)
case '#':
case ':':
case '\"':
case '\'':
scanner->buf[0] = next;
scanner->buf[1] = 0;
return ARCTIC_OK;
FOR_PLAIN_OPS(CASE)
case '\n':
scanner->op_callback(next, 0, scanner->data);
return ARCTIC_OK;
default:
return ARCTIC_UNEXPECTED_CHAR;
}
case '#': /* comment */
if (next == '\n')
scanner->buf[0] = 0;
return ARCTIC_OK;
case '"': /* section switch */
if (next == '"') {
scanner->section_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
case ':': /* label name */
if (next == ' ') {
scanner->label_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
case '\'': /* data injection */
if (next == '\'') {
scanner->data_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
FOR_IMMEDIATE_OPS(CASE) /* immediate ops */
if (next == ' ') {
scanner->op_callback(scanner->buf[0], scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
default:
return ARCTIC_INVALID_STATE;
}
}

View file

@ -1,59 +0,0 @@
/* ARCTIC library header. To be used as a utility by interpreters and compilers.
* Author: Louis A. Burke
*
* Does not require dynamic memory or a c standard library, so as to be easy to
* use on e.g. microcontrollers.
*/
#ifndef ARCTIC_H
#define ARCTIC_H
#ifndef ARCTIC_BUFSIZE
#define ARCTIC_BUFSIZE 1024
#endif /* ARCTIC_BUFSIZE */
/* encoding/decoding */
extern const char ARCTIC_CODE_PAGE[97];
/* scanning */
enum ArcticImmediateKind {
ARCTIC_NONE, ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER
};
struct ArcticScanner {
void *data; /* callback data pointer */
void (*section_callback)(
const char *name, /* the name of the section */
void *data /* callback data */
);
void (*label_callback)(
const char *id, /* the identifier itself */
void *data /* callback data */
);
void (*op_callback)(
char opcode, /* the character code of the operation */
const char *im, /* the immediate value, if it exists */
void *data /* callback data */
);
void (*data_callback)(
const char *init, /* initialization code */
void *data /* callback data */
);
char buf[ARCTIC_BUFSIZE];
};
enum ArcticErrorCode {
ARCTIC_OK = 0,
ARCTIC_UNEXPECTED_CHAR, /* not necessarily an error */
ARCTIC_INVALID_STATE,
ARCTIC_BUFFER_FULL
};
/* returns 0 on success, or an error code */
enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next);
#endif /* ARCTIC_H */

25
src/args.c Normal file
View file

@ -0,0 +1,25 @@
#include "args.h"
#include <argp.h>
static int parse_opt(int key, char *arg, struct argp_state *state) {
switch (key) {
}
return 0;
}
int parse_arguments(struct Arguments *args, int argc, char *argv[]) {
struct argp_option options[] = {
{ "interpret", 'i', 0, 0, "Interpret the executable, instead of jitting it", 0 },
{ "jit", 'j', "ARCH", OPTION_ARG_OPTIONAL, "JIT the executable with instructions for ARCH [default: native]", 0 },
{ "asm", 'a', "ARCH", OPTION_ARG_OPTIONAL, "Assemble the executable with instructions for ARCH [default: native]", 0},
{ 0 }
};
const char *args_doc = "PROGRAM";
struct argp argp = { options, parse_opt, args_doc, 0, 0, 0 };
return argp_parse(&argp, argc, argv, 0, 0, args);
}

32
src/args.h Normal file
View file

@ -0,0 +1,32 @@
#ifndef ARGS_H
#define ARGS_H
#include "architectures.h"
#include <stdio.h>
enum Behaviour {
BEHAVIOUR_INTERPRET,
BEHAVIOUR_JIT,
BEHAVIOUR_ASM
};
struct Arguments {
enum Behaviour behaviour;
FILE *program;
union {
struct {
} interpret;
struct {
} jit;
struct {
enum Architecture arch;
} assemble;
};
};
int parse_arguments(struct Arguments *args, int argc, char *argv[]);
#endif /* ARGS_H */

7
src/jit.c Normal file
View file

@ -0,0 +1,7 @@
#include "jit.h"
#ifdef __x86_64__
#include "jit/x86_64.c"
#else
#include "jit/interpret.c"
#endif /* architecture */

7
src/jit.h Normal file
View file

@ -0,0 +1,7 @@
#ifndef JIT_H
#define JIT_H
/* the implementations must implement this function */
int (*jit(const char *src))(int, char**);
#endif /* JIT_H */

12
src/jit/interpret.c Normal file
View file

@ -0,0 +1,12 @@
static const char *arctic_src;
static int arctic_main(int argc, char **argv) {
// TODO: interpret the code in arctic_src
return 0;
}
int (*jit(const char *src))(int, char**) {
arctic_src = src; // TODO: instead of interpreting straight from src,
// compile structure first and compile from that.
return arctic_main;
}

27
src/jit/x86_64.c Normal file
View file

@ -0,0 +1,27 @@
#include <stdint.h>
#include "x86_64.h"
/*
push rbp
mov rbp, rsp
sub rsp, 16
mov DWORD PTR [rbp-4], edi
mov QWORD PTR [rbp-16], rsi
mov rdx, QWORD PTR [rbp-16]
mov eax, DWORD PTR [rbp-4]
mov rsi, rdx
mov edi, eax
call foo
leave
ret
*/
const uint8_t prelude[] = {
// PUSH(RBP)
// MOV(RBP,RSP)
};
int (*jit(const char *src))(int, char**) {
// TODO: see https://github.com/spencertipping/jit-tutorial
}

19
src/jit/x86_64.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef X86_64_H
#define X86_64_H
/*******************************************************************************
Types:
R: RAX, RCX, RDX, RBX, R8, R9, R10, R11, R12, R13, R14, R15
V: BYTE(X), WORD(X)
Q: QWORD(X)
*******************************************************************************/
#define PUSH(RV) PUSH_ ## RV
#define POP(R) POP_ ## R
#define MOV(SRC,DST) MOV_ ## SRC ## _ ## DST
/* TODO: sub(R, RV) */
// #include "x86_64_opcodes.h"
#endif /* X86_64_H */

44
src/jit/x86_64_opcodes.h Normal file
View file

@ -0,0 +1,44 @@
#ifndef X86_64_OPCODES_H
#define X86_64_OPCODES_H
#include "../utils.h"
/* for list creation, other uses can override X() */
#define X(x) (x),
#define XX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x))
#define XXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x))
#define XXXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x)) X(BYTE_0xFF000000(x))
#define PUSH_RAX X(0x50)
#define PUSH_RCX X(0x51)
#define PUSH_RDX X(0x52)
#define PUSH_RBX X(0x53)
#define PUSH_RBP X(0x55)
#define PUSH_R8 XX(0x4150)
#define PUSH_R9 XX(0x4151)
#define PUSH_R10 XX(0x4152)
#define PUSH_R11 XX(0x4153)
#define PUSH_R12 XX(0x4154)
#define PUSH_R13 XX(0x4155)
#define PUSH_R14 XX(0x4156)
#define PUSH_R15 XX(0x4157)
#define PUSH_BYTE(x) X(0x6a) X(x)
#define PUSH_WORD(x) X(0x68) XXXX(x)
#define POP_RAX X(0x58)
#define POP_RCX X(0x59)
#define POP_RDX X(0x5a)
#define POP_RBX X(0x5b)
#define POP_R8 XX(0x4158)
#define POP_R9 XX(0x4159)
#define POP_R10 XX(0x415a)
#define POP_R11 XX(0x415b)
#define POP_R12 XX(0x415c)
#define POP_R13 XX(0x415d)
#define POP_R14 XX(0x415e)
#define POP_R15 XX(0x415f)
#include "./x86_64_mov_opcodes.h"
#endif /* X86_64_OPCODES_H */

14
src/main.c Normal file
View file

@ -0,0 +1,14 @@
#include <stdio.h>
#include <argp.h>
#include "jit.h"
#include "args.h"
#include "architectures.h"
int main(int argc, char *argv[]) {
struct Arguments args;
int rc = parse_arguments(&args, argc, argv);
printf("%d\n", rc);
return 0;
}

9
src/utils.h Normal file
View file

@ -0,0 +1,9 @@
#ifndef UTILS_H
#define UTILS_H
#define BYTE_0xFF(x) ((x) & 0xFF)
#define BYTE_0xFF00(x) BYTE_0xFF((x) >> 8)
#define BYTE_0xFF0000(x) BYTE_0xFF((x) >> 16)
#define BYTE_0xFF000000(x) BYTE_0xFF((x) >> 24)
#endif /* UTILS_H */

10
tools/x86_64dump Executable file
View file

@ -0,0 +1,10 @@
#!/bin/bash
tmp="$(mktemp)"
echo '[bits 64]' >"$tmp"
cat >>"$tmp"
if ! nasm "$@" "$tmp" -o /dev/stdout | xxd -p; then
>&2 echo "Failed to assemble: $(cat "$tmp")"
fi
rm "$tmp"

6
tools/x86dump Executable file
View file

@ -0,0 +1,6 @@
#!/bin/bash
tmp="$(mktemp)"
cat >"$tmp"
nasm "$@" "$tmp" -o /dev/stdout | xxd -p
rm "$tmp"