diff --git a/.gitignore b/.gitignore index f86f5b2..7d2119e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,11 +4,19 @@ htmldocs acdump src/arctic/handle_syscalls.nim syscalls.json +obj/ -# Documentation artefacts -doc/documentation.html -doc/documentation.tex -doc/documentation.pdf +# Generated sources +src/arch/x86_64.h + +# Documentation artifacts +documentation.html +documentation.tex +documentation.pdf +overview.html +overview.tex +overview.pdf +cheatsheet.pdf # LaTeX garbage *.log diff --git a/Makefile b/Makefile index d541bee..540c94d 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,91 @@ .PHONY: default +.SUFFIXES: +MAKEFLAGS+=--no-builtin-rules -default: docs +NODEPS=clean + +OBJECT_DIR=obj +SOURCE_DIR=src + +EMPTY:= +SPACE:=$(EMPTY) $(EMPTY) +COMMA:=, + +default: all +all: docs arctic DOCUMENTS=$(shell find doc/ -name '*.md') -HTMLDOCS=$(patsubst %.md,%.html,$(DOCUMENTS)) -PDFDOCS=$(patsubst %.md,%.pdf,$(DOCUMENTS)) +HTMLDOCS=$(patsubst doc/%.md,%.html,$(DOCUMENTS)) +PDFDOCS=$(patsubst doc/%.md,%.pdf,$(DOCUMENTS)) cheatsheet.pdf docs: $(HTMLDOCS) $(PDFDOCS) -%.html: %.md +%.html: doc/%.md multimarkdown -t html $^ -o $@ %.tex: %.md - multimarkdown -t latex $^ -o $@ + multimarkdown -t latex $^ | sed 's/^\[/{[}/g' > $@ -%.pdf: %.tex doc/arctic-leader.tex doc/arctic-begin.tex doc/arctic-footer.tex - cd $$(dirname $<) && xelatex -interaction=batchmode $< +%.pdf: doc/%.tex doc/arctic-leader.tex doc/arctic-begin.tex doc/arctic-footer.tex + # cd doc && latexmk -pdf -xelatex -use-make $*.tex && mv $*.pdf ../ + cd $$(dirname $<) && xelatex -interaction=batchmode $*.tex && mv $*.pdf ../ + +GENERATED_M4=$(shell find $(SOURCE_DIR)/ -name '*.m4') +GENERATED_FS=$(patsubst %.m4,%,$(GENERATED_M4)) + +%: %.m4 misc/utils.m4 + m4 $< > $@ + +# TODO: fix the structure so this is easier. the JIT libs should become ASM +# libs, while the actual JIT sources only get included on whatever the native +# arch is (tunable with ARCH=... in make) +MAIN_SOURCES=$(shell find $(SOURCE_DIR)/ -name '*.c' -not -path '$(SOURCE_DIR)/jit/*') +MAIN_OBJECTS=$(patsubst $(SOURCE_DIR)/%.c,$(OBJECT_DIR)/%.o,$(MAIN_SOURCES)) +MAIN_DEPENDS=$(patsubst $(SOURCE_DIR)/%.c,$(OBJECT_DIR)/%.d,$(MAIN_SOURCES)) + +ifeq (0, $(words $(findstring $(MAKECMDGOALS), $(NODEPS)))) +ifeq (,$(findstring print-,$(MAKECMDGOALS))) +$(info Including depends files: $(MAIN_DEPENDS)) +include $(MAIN_DEPENDS) +endif +endif + +$(OBJECT_DIR): + mkdir $@ + +$(OBJECT_DIR)/%.d: $(SOURCE_DIR)/%.c | $(OBJECT_DIR) $(GENERATED_FS) + $(CC) $(CFLAGS) -MM -MT '$(patsubst $(SOURCE_DIR)/%.c,$(OBJECT_DIR)/%.o,$<)' $< -MF $@ + +$(OBJECT_DIR)/%.o: $(SOURCE_DIR)/%.c | $(OBJECT_DIR) + $(CC) $(CFLAGS) -c $< -o $@ + +arctic: $(MAIN_OBJECTS) + $(CC) $(LDFLAGS) $^ -o $@ + +X86_64_BIT_REGISTERS=RAX RBX RCX RDX RSI RDI RBP RSP R8 R9 R10 R11 R12 R13 R14 R15 +src/jit/x86_64_mov_opcodes.h: + echo '#ifndef X86_64_MOV_OPCODES_H' > $@ + echo '#define X86_64_MOV_OPCODES_H' >> $@ + echo >> $@ + for dstreg in {$(subst $(SPACE),$(COMMA),$(X86_64_BIT_REGISTERS))}; do \ + for srcreg in {$(subst $(SPACE),$(COMMA),$(X86_64_BIT_REGISTERS))}; do \ + echo "mov $$srcreg,$$dstreg" | ./tools/x86_64dump | \ + sed 's/../ X(0x&)/g' | \ + sed "s/.*/#define MOV_$${srcreg}_$${dstreg}&/" >> $@; \ + done; \ + done + echo >> $@ + echo '#endif /* X86_64_MOV_OPCODES_H */' >> $@ + + +clean: + -rm $(patsubst doc/%.md,doc/%.aux,$(DOCUMENTS)) + -rm $(patsubst doc/%.md,doc/%.xdv,$(DOCUMENTS)) + -rm $(patsubst doc/%.md,doc/%.out,$(DOCUMENTS)) + -rm $(MAIN_OBJECTS) + -rm $(MAIN_DEPENDS) + -rm arctic print-%: @echo '$*=$($*)' diff --git a/doc/cheatsheet.tex b/doc/cheatsheet.tex new file mode 100644 index 0000000..35f2a6c --- /dev/null +++ b/doc/cheatsheet.tex @@ -0,0 +1,116 @@ +\documentclass{article} + +\usepackage[letterpaper, landscape, margin=1cm]{geometry} + +\usepackage{tabularx} +\usepackage{tabulary} +\usepackage{booktabs} +\usepackage{tikz} + +\usetikzlibrary{shapes.multipart,positioning} + +\tikzstyle{stack}=[rectangle split, rectangle split parts=#1,draw,anchor=center] + +\newenvironment{opsummary}[2]{% +\begin{tikzpicture} +\newcommand{\opsummaryshort}{#1} +\newcommand{\opsummarylong}{#2} +}{% +\draw[->, double distance=1pt] (before) -- node[above] {\texttt{\opsummaryshort}} node[below] {\texttt{\opsummarylong}} (after); +\end{tikzpicture} +} + +\begin{document} + +\begin{tabulary}{\textwidth}{@{}cccccccccccccccc@{}} \toprule + \begin{opsummary}{0}{BNEZ} + \node[stack=2] (before) {% + \nodepart{one}$x$ + \nodepart{two}\ldots + }; + \node[stack=1,right=of before] (after) {% + \nodepart{one}\ldots + }; + \end{opsummary} + & + + \begin{opsummary}{1}{LDAB} + \node[stack=2] (before) {% + \nodepart{one}$p$ + \nodepart{two}… + }; + \node[stack=2,right=of before] (after) {% + \nodepart{one}$x$ + \nodepart{two}… + }; + \end{opsummary} + & + + \begin{opsummary}{2}{LDAS} + \node[stack=2] (before) {% + \nodepart{one}$p$ + \nodepart{two}… + }; + \node[stack=2,right=of before] (after) {% + \nodepart{one}$x$ + \nodepart{two}… + }; + \end{opsummary} + & + + \begin{opsummary}{3}{LDAI} + \node[stack=2] (before) {% + \nodepart{one}$p$ + \nodepart{two}… + }; + \node[stack=2,right=of before] (after) {% + \nodepart{one}$x$ + \nodepart{two}… + }; + \end{opsummary} + & + + \begin{opsummary}{4}{LDAW} + \node[stack=2] (before) {% + \nodepart{one}$p$ + \nodepart{two}… + }; + \node[stack=2,right=of before] (after) {% + \nodepart{one}$x$ + \nodepart{two}… + }; + \end{opsummary} + & + + 5 & + 6 & + 7 & + 8 & + 9 & + A & + B & + C & + D & + E & + F \\ + + G & + H & + I & + J & + K & + L & + M & + N & + O & + P & + Q & + R & + S & + T & + U & + V \\ + +\end{tabulary} + +\end{document} diff --git a/doc/documentation.md b/doc/documentation.md index 02e218b..ce95bb0 100644 --- a/doc/documentation.md +++ b/doc/documentation.md @@ -2,7 +2,7 @@ Title: ARCTIC Documentation Author: Louis A. Burke Language: en -CSS: documentation.css +CSS: doc/documentation.css LaTeX Leader: arctic-leader.tex LaTeX Begin: arctic-begin.tex LaTeX Footer: arctic-footer.tex @@ -15,10 +15,25 @@ LaTeX Header Level: 3 Like the JVM, but the "binaries" consist of printable ASCII characters and the result can be more easily precompiled for native execution. Also far less safety -and verification, feel free to shoot yourself in the foot! +and verification, feel free to shoot yourself in the foot. + +## Behaviour + +The behaviour of ARCTIC code is described by how a theoretical virtual machine +would interpret it. Any compiler or interpreter that produces the same series of +exernal syscalls with the same values and returns the same result as the +theoretical virtual machine is considered a compliant ARCTIC implementation. + +This documentation describes the input as a "file", but it actually may consist +of multiple files or may be processed from standard input. How an ARCTIC +implementation consumes its input is up to each specific implementation, but +should be well documented. ## Structure +ARCTIC code is divided into sections. In each section, each character represents +a different operation. + ### Data Types ### The Stack @@ -33,9 +48,10 @@ CODE is BE / LDAx Specifically: + .* is shared file load (" libc.so") A.* is BE atomic data a.* is LE atomic data -[bB].* is BSS data +[bB].* is BSS data (uninitialized, but allocated) C.* is BE code c.* is LE code D.* is BE data @@ -46,14 +62,33 @@ e.* is LE externals [gG].* is embedded resource ("get X") [hH].* is embedded resource ("http...") [iI].* is embedded code ("include X") +[jJ].* is embedded java ("jvm X") +[kK].* is ??? [lL].* is embedded arctic file ("load X") M.* is BE macros m.* is LE macros +[nN].* is ??? +O.* is BE register-based code ("OPTIMIZED") +o.* is LE register-based code ("optimized") P.* is BE per-thread data (POOL) p.* is LE per-thread data (pool) +Q.* is BE quantum code ("QUANTUM") +q.* is LE quantum code ("quantum") R.* is BE read-only data r.* is LE read-only data +[sS].* is ??? +[tT].* is ??? +[uU].* is ??? +[vV].* is ??? [wW].* is load module ("with X") +[xX].* is ??? +[yY].* is ??? +[zZ].* is ??? + +Need better description of data '' format. See QBE's data for inspiration? +Consider having each op-code mean something different in different sections? +Only consistency would be labels... those could be more universally parsed? +Alternatively remove sections altogether and use only dynamic memory? ``` ## File Format @@ -123,12 +158,12 @@ Q |LDOW| … p| ⇒ | … x | load word at p plus immediate o 6 |STAS| … p x| → | … | store short x to p 7 |STAI| … p x| → | … | store int x to p 8 |STAW| … p x| → | … | store word x to p -D |MCLR| … p x| → | … | memclear x bytes from p -d |MSET| … p x| ⇒ | … | set x bytes of memory to immediate value at p -K |MCPY| … p q x| → | … | copy x bytes of memory from p to q +D†|MCLR| … p x| → | … | memclear x bytes from p +d†|MSET| … p x| ⇒ | … | set x bytes of memory to immediate value at p +K†|MCPY| … p q x| → | … | copy x bytes of memory from p to q J |MOFF| … p x| → | … q | set q to memory pointer p shifted by x bytes -M |MALL| … p| ⇒ | … p | (re)allocate/free memory for immediate bytes at p -R |REAL| … p x| → | … p | (re)allocate/free memory for x bytes at p +M†|MALL| … p| ⇒ | … p | (re)allocate/free memory for immediate bytes at p +R†|REAL| … p x| → | … p | (re)allocate/free memory for x bytes at p 9 |CASS| … p x y| → | … z | compare and swap x and y at p, return success #### LDAx diff --git a/doc/overview.md b/doc/overview.md new file mode 100644 index 0000000..4c8fd0c --- /dev/null +++ b/doc/overview.md @@ -0,0 +1,26 @@ +--- +Title: ARCTIC Documentation +Author: Louis A. Burke +Language: en +CSS: documentation.css +LaTeX Leader: arctic-leader.tex +LaTeX Begin: arctic-begin.tex +LaTeX Footer: arctic-footer.tex +LaTeX Header Level: 3 +--- + +# ARCTIC Code + + ASCII Rendered, Cross-Target Invocation Character Codes + +An executable "binary" format consisting of only ASCII characters. + +## Goals + +The priorities of this project are: + + - Reasonable performance + - Easy to compile to + - Reasonable to hand-write + - Reasonable to debug + - Possible to interface to native environments diff --git a/misc/utils.m4 b/misc/utils.m4 new file mode 100644 index 0000000..47c37e9 --- /dev/null +++ b/misc/utils.m4 @@ -0,0 +1,50 @@ +divert(`-1')dnl +#changequote(`[', `]')dnl + +define(`TICK', changequote([,])[changequote([,])'changequote(`,')]changequote(`,')) + +# forloop(var, from, to, stmt) - improved version: +# works even if VAR is not a strict macro name +# performs sanity check that FROM is larger than TO +# allows complex numerical expressions in TO and FROM +define(`forloop', `ifelse(eval(`($2) <= ($3)'), `1', + `pushdef(`$1')_$0(`$1', eval(`$2'), + eval(`$3'), `$4')popdef(`$1')')') +define(`_forloop', + `define(`$1', `$2')$4`'ifelse(`$2', `$3', `', + `$0(`$1', incr(`$2'), `$3', `$4')')') + +# foreachq(x, `item_1, item_2, ..., item_n', stmt) +# quoted list, version based on forloop +define(`foreachq', +`ifelse(`$2', `', `', `_$0(`$1', `$3', $2)')') +define(`_foreachq', +`pushdef(`$1', forloop(`$1', `3', `$#', + `$0_(`1', `2', indir(`$1'))')`popdef( + `$1')')indir(`$1', $@)') +define(`_foreachq_', +``define(`$$1', `$$3')$$2`''') + +# foreach(x, (item_1, item_2, ..., item_n), stmt) +# parenthesized list, improved version +define(`foreach', `pushdef(`$1')_$0(`$1', + (dquote(dquote_elt$2)), `$3')popdef(`$1')') +define(`_arg1', `$1') +define(`_foreach', `ifelse(`$2', `(`')', `', + `define(`$1', _arg1$2)$3`'$0(`$1', (dquote(shift$2)), `$3')')') + +# hextox(x) +# hexadecimal number to series of X(0xZZ)X(0xZZ)... +define(`hextox', `patsubst(`$1', `..', `X(0x\&)')') + +# hashtag(x) +# expands to #x +define(`hashtag', `changecom()dnl +#$1`'dnl +changecom(`#')') + +# rtrimn(x, n) +# expands to all but the last n characters of x +define(`rtrimn', `substr($1, 0, eval(len($1)-$2))') + +divert(`0')dnl diff --git a/samples/asm.arx b/samples/asm.arx new file mode 100644 index 0000000..0c61a09 --- /dev/null +++ b/samples/asm.arx @@ -0,0 +1,11 @@ +#!/usr/bin/env arctic + +# This is a basic ARCTIC assembler, written in ARCTIC. + +@main + +"data" # read-only memory +:opcodes '"BNEZLDABLDASLDAILDAWSTABSTASSTAISTAWCASSPUTAPUTBPUTCMCLRFTOIBMISLDOILDOSPUTIMOFFMCPYROLLMALLPUTNITOCPICKLDOWREALITOSPUSHCMPUBIORITOWPUTXPUTYPUTZGETAGETBGETCMSETSROTFMODFINVOVERGETIFNEGSPOPBITCFSUBGETNLDOBFMULFDIVFREMFADDSWAPUSHRFABSSDUPGETXGETYGETZINEGITOFIADDISUBIMULIDIVIMODIREMIABSVALUCALLBLTZBLEZBEQZBGEZBGTZJUMPRETNBANDBXORBNOTROTRBSHRCMPICMPFLABLDPTHPACKDATASECTBIFCNOOPBEAT"' + +"state" # initialized read-write memory + diff --git a/share/vim/ftdetect/arctic.vim b/share/vim/ftdetect/arctic.vim new file mode 100644 index 0000000..c247647 --- /dev/null +++ b/share/vim/ftdetect/arctic.vim @@ -0,0 +1,5 @@ +" Description: ARCTIC ftdetect file +" Language: ARCTIC (2024) +" Maintainer: Louis Burke + +autocmd BufRead,BufNewFile *.arx set filetype=arctic diff --git a/share/vim/syntax/arctic.vim b/share/vim/syntax/arctic.vim new file mode 100644 index 0000000..8c15143 --- /dev/null +++ b/share/vim/syntax/arctic.vim @@ -0,0 +1,33 @@ +" Description: ARCTIC syntax file +" Language: ARCTIC (2024) +" Maintainer: Louis Burke + +if exists("b:current_syntax") || version < 700 + finish +endif + +let b:current_syntax = "arctic" + +syntax match arcticComment "\v#.*$" +highlight link arcticComment Comment + +syntax region arcticData start=/'/ end=/'/ +highlight link arcticData Character + +syntax region arcticSection start=/"/ end=/"/ +highlight link arcticSection PreProc + +syntax match arcticLabel "\v:[^ ]*" +highlight link arcticLabel Label + +syntax match arcticCall "\v\@[^ ]*" +highlight link arcticCall Function + +syntax match arcticConstant "\v\$[^ ]*" +highlight link arcticConstant Constant + +syntax match arcticImmediate "\v[][0EFGHLMPQTdou$@<{=}>,`][^ ]*" +highlight link arcticImmediate Statement + +syntax match arcticOperator "\v[-123456789ABCDIJKNORSUVWXYZabcefghijklmnpqrstvwxyz_.+*/%\|;&^!?()]" +highlight link arcticOperator Operator diff --git a/src/arch/x86_64.h.m4 b/src/arch/x86_64.h.m4 new file mode 100644 index 0000000..bc42ee8 --- /dev/null +++ b/src/arch/x86_64.h.m4 @@ -0,0 +1,68 @@ +#ifndef X86_64_H +#define X86_64_H + +#include "../utils.h" + +/******************************************************************************* +include(`misc/utils.m4')dnl +divert(`-1')dnl + +define(`registers', `RAX, RCX, RDX, RBX, R8, R9, R10, R11, R12, R13, R14, R15') + +define(`x86_encoding_of', `translit(esyscmd(`echo 'TICK`$1'TICK` | ./tools/x86_64dump'),` +')') +define(`assemble', `hextox(x86_encoding_of($1)) Y("$1")') + +divert(`0')dnl + +Types: + +R: registers +V: BYTE(X), WORD(X) +Q: QWORD(X) + +To use these macros, either define X(x) as (x), to make lists of bytes, or +define Y(s) as whatever you want to do with a string literal. Keep the other +defined as an empty expansion. + +*******************************************************************************/ + +#define PUSH(A_RV) PUSH_ ## A_RV +#define POP(A_R) POP_ ## A_R +#define MOV(DST_R,SRC_RV) MOV_ ## DST_R ## _ ## SRC_RV +#define ADD(DST_R,SRC_RV) ADD_ ## DST_R ## _ ## SRC_RV +#define SUB(DST_R,SRC_RV) SUB_ ## DST_R ## _ ## SRC_RV + +/******************************************************************************* +Implementations (auto generated): +*******************************************************************************/ + +#define XX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) +#define XXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x)) +#define XXXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x)) X(BYTE_0xFF000000(x)) +foreachq(register, `registers', `hashtag(define) PUSH_`'register`' assemble(`push register') +') +#define PUSH_BYTE(x) X(0x6a) X(x) Y("push " #x) +#define PUSH_WORD(x) X(0x86) XXXX(x) Y("push " #x) + +foreachq(register, `registers', `hashtag(define) POP_`'register`' assemble(`pop register') +') +foreachq(registera, `registers', ` +foreachq(registerb, `registers', `ifelse(registera,registerb,,hashtag(define) MOV_`'registera`'_`'registerb`' assemble(`mov registera`,'registerb') +)')') +foreachq(register, `registers', `hashtag(define) MOV_`'register`'_WORD(x) hextox(rtrimn(x86_encoding_of(`mov 'register`, 0'), 8)) XXXX(x) Y("mov register," hashtag(x)) +') + +foreachq(registera, `registers', ` +foreachq(registerb, `registers', `ifelse(registera,registerb,,hashtag(define) ADD_`'registera`'_`'registerb`' assemble(`add registera`,'registerb') +)')') +foreachq(register, `registers', `hashtag(define) ADD_`'register`'_WORD(x) hextox(rtrimn(x86_encoding_of(`add 'register`, 0x12345678'), 8)) XXXX(x) Y("add register," hashtag(x)) +') + +foreachq(registera, `registers', ` +foreachq(registerb, `registers', `ifelse(registera,registerb,,hashtag(define) SUB_`'registera`'_`'registerb`' assemble(`sub registera`,'registerb') +)')') +foreachq(register, `registers', `hashtag(define) SUB_`'register`'_WORD(x) hextox(rtrimn(x86_encoding_of(`sub 'register`, 0x12345678'), 8)) XXXX(x) Y("sub register," hashtag(x)) +') + +#endif /* X86_64_H */ diff --git a/src/architectures.c b/src/architectures.c new file mode 100644 index 0000000..457315b --- /dev/null +++ b/src/architectures.c @@ -0,0 +1,3 @@ +#include "architectures.h" + +#include "arch/x86_64.h" diff --git a/src/architectures.h b/src/architectures.h new file mode 100644 index 0000000..468ddc1 --- /dev/null +++ b/src/architectures.h @@ -0,0 +1,8 @@ +#ifndef ARCHITECTURES_H +#define ARCHITECTURES_H + +enum Architecture { + ARCH_NATIVE +}; + +#endif /* ARCHITECTURES_H */ diff --git a/src/args.c b/src/args.c new file mode 100644 index 0000000..e67fbbb --- /dev/null +++ b/src/args.c @@ -0,0 +1,25 @@ +#include "args.h" + +#include + +static int parse_opt(int key, char *arg, struct argp_state *state) { + switch (key) { + + } + + return 0; +} + +int parse_arguments(struct Arguments *args, int argc, char *argv[]) { + struct argp_option options[] = { + { "interpret", 'i', 0, 0, "Interpret the executable, instead of jitting it", 0 }, + { "jit", 'j', "ARCH", OPTION_ARG_OPTIONAL, "JIT the executable with instructions for ARCH [default: native]", 0 }, + { "asm", 'a', "ARCH", OPTION_ARG_OPTIONAL, "Assemble the executable with instructions for ARCH [default: native]", 0}, + { 0 } + }; + + const char *args_doc = "PROGRAM"; + + struct argp argp = { options, parse_opt, args_doc, 0, 0, 0 }; + return argp_parse(&argp, argc, argv, 0, 0, args); +} diff --git a/src/args.h b/src/args.h new file mode 100644 index 0000000..0bae1fb --- /dev/null +++ b/src/args.h @@ -0,0 +1,32 @@ +#ifndef ARGS_H +#define ARGS_H + +#include "architectures.h" + +#include + +enum Behaviour { + BEHAVIOUR_INTERPRET, + BEHAVIOUR_JIT, + BEHAVIOUR_ASM +}; + +struct Arguments { + enum Behaviour behaviour; + FILE *program; + union { + struct { + } interpret; + + struct { + } jit; + + struct { + enum Architecture arch; + } assemble; + }; +}; + +int parse_arguments(struct Arguments *args, int argc, char *argv[]); + +#endif /* ARGS_H */ diff --git a/src/jit.c b/src/jit.c new file mode 100644 index 0000000..e4f5ea0 --- /dev/null +++ b/src/jit.c @@ -0,0 +1,7 @@ +#include "jit.h" + +#ifdef __x86_64__ +#include "jit/x86_64.c" +#else +#include "jit/interpret.c" +#endif /* architecture */ diff --git a/src/jit.h b/src/jit.h new file mode 100644 index 0000000..496575c --- /dev/null +++ b/src/jit.h @@ -0,0 +1,7 @@ +#ifndef JIT_H +#define JIT_H + +/* the implementations must implement this function */ +int (*jit(const char *src))(int, char**); + +#endif /* JIT_H */ diff --git a/src/jit/interpret.c b/src/jit/interpret.c new file mode 100644 index 0000000..ab39230 --- /dev/null +++ b/src/jit/interpret.c @@ -0,0 +1,12 @@ +static const char *arctic_src; +static int arctic_main(int argc, char **argv) { + // TODO: interpret the code in arctic_src + + return 0; +} + +int (*jit(const char *src))(int, char**) { + arctic_src = src; // TODO: instead of interpreting straight from src, + // compile structure first and compile from that. + return arctic_main; +} diff --git a/src/jit/x86_64.c b/src/jit/x86_64.c new file mode 100644 index 0000000..c8da3f8 --- /dev/null +++ b/src/jit/x86_64.c @@ -0,0 +1,27 @@ +#include + +#include "x86_64.h" + +/* + push rbp + mov rbp, rsp + sub rsp, 16 + mov DWORD PTR [rbp-4], edi + mov QWORD PTR [rbp-16], rsi + mov rdx, QWORD PTR [rbp-16] + mov eax, DWORD PTR [rbp-4] + mov rsi, rdx + mov edi, eax + call foo + leave + ret +*/ +const uint8_t prelude[] = { + // PUSH(RBP) + // MOV(RBP,RSP) +}; + +int (*jit(const char *src))(int, char**) { + // TODO: see https://github.com/spencertipping/jit-tutorial +} + diff --git a/src/jit/x86_64.h b/src/jit/x86_64.h new file mode 100644 index 0000000..2072c6c --- /dev/null +++ b/src/jit/x86_64.h @@ -0,0 +1,19 @@ +#ifndef X86_64_H +#define X86_64_H + +/******************************************************************************* +Types: + +R: RAX, RCX, RDX, RBX, R8, R9, R10, R11, R12, R13, R14, R15 +V: BYTE(X), WORD(X) +Q: QWORD(X) +*******************************************************************************/ + +#define PUSH(RV) PUSH_ ## RV +#define POP(R) POP_ ## R +#define MOV(SRC,DST) MOV_ ## SRC ## _ ## DST +/* TODO: sub(R, RV) */ + +// #include "x86_64_opcodes.h" + +#endif /* X86_64_H */ diff --git a/src/jit/x86_64_opcodes.h b/src/jit/x86_64_opcodes.h new file mode 100644 index 0000000..65ed5ac --- /dev/null +++ b/src/jit/x86_64_opcodes.h @@ -0,0 +1,44 @@ +#ifndef X86_64_OPCODES_H +#define X86_64_OPCODES_H + +#include "../utils.h" + +/* for list creation, other uses can override X() */ +#define X(x) (x), + +#define XX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) +#define XXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x)) +#define XXXX(x) X(BYTE_0xFF(x)) X(BYTE_0xFF00(x)) X(BYTE_0xFF0000(x)) X(BYTE_0xFF000000(x)) + +#define PUSH_RAX X(0x50) +#define PUSH_RCX X(0x51) +#define PUSH_RDX X(0x52) +#define PUSH_RBX X(0x53) +#define PUSH_RBP X(0x55) +#define PUSH_R8 XX(0x4150) +#define PUSH_R9 XX(0x4151) +#define PUSH_R10 XX(0x4152) +#define PUSH_R11 XX(0x4153) +#define PUSH_R12 XX(0x4154) +#define PUSH_R13 XX(0x4155) +#define PUSH_R14 XX(0x4156) +#define PUSH_R15 XX(0x4157) +#define PUSH_BYTE(x) X(0x6a) X(x) +#define PUSH_WORD(x) X(0x68) XXXX(x) + +#define POP_RAX X(0x58) +#define POP_RCX X(0x59) +#define POP_RDX X(0x5a) +#define POP_RBX X(0x5b) +#define POP_R8 XX(0x4158) +#define POP_R9 XX(0x4159) +#define POP_R10 XX(0x415a) +#define POP_R11 XX(0x415b) +#define POP_R12 XX(0x415c) +#define POP_R13 XX(0x415d) +#define POP_R14 XX(0x415e) +#define POP_R15 XX(0x415f) + +#include "./x86_64_mov_opcodes.h" + +#endif /* X86_64_OPCODES_H */ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..2a1ad29 --- /dev/null +++ b/src/main.c @@ -0,0 +1,14 @@ +#include +#include + +#include "jit.h" +#include "args.h" +#include "architectures.h" + +int main(int argc, char *argv[]) { + struct Arguments args; + int rc = parse_arguments(&args, argc, argv); + + printf("%d\n", rc); + return 0; +} diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 0000000..c5f559d --- /dev/null +++ b/src/utils.h @@ -0,0 +1,9 @@ +#ifndef UTILS_H +#define UTILS_H + +#define BYTE_0xFF(x) ((x) & 0xFF) +#define BYTE_0xFF00(x) BYTE_0xFF((x) >> 8) +#define BYTE_0xFF0000(x) BYTE_0xFF((x) >> 16) +#define BYTE_0xFF000000(x) BYTE_0xFF((x) >> 24) + +#endif /* UTILS_H */ diff --git a/tools/x86_64dump b/tools/x86_64dump new file mode 100755 index 0000000..94ee62f --- /dev/null +++ b/tools/x86_64dump @@ -0,0 +1,10 @@ +#!/bin/bash + +tmp="$(mktemp)" +echo '[bits 64]' >"$tmp" +cat >>"$tmp" +if ! nasm "$@" "$tmp" -o /dev/stdout | xxd -p; then + >&2 echo "Failed to assemble: $(cat "$tmp")" +fi +rm "$tmp" + diff --git a/tools/x86dump b/tools/x86dump new file mode 100755 index 0000000..ee1744c --- /dev/null +++ b/tools/x86dump @@ -0,0 +1,6 @@ +#!/bin/bash + +tmp="$(mktemp)" +cat >"$tmp" +nasm "$@" "$tmp" -o /dev/stdout | xxd -p +rm "$tmp"