diff --git a/doc/documentation.md b/doc/documentation.md index 5315c60..3d83196 100644 --- a/doc/documentation.md +++ b/doc/documentation.md @@ -8,6 +8,18 @@ result can be more easily precompiled for native execution. ```{=todo} Notate that immediates can be: …###, …###.###, …$name, or …v (for abcinxyz) for most operations that take immediates. + +Sections, a la ELF. Probably want: + + - code + - macros (ephemeral "code") + - constants (ephemeral "data") + - data (read-only memory) + - state (initialized memory) + - memory (uninitialized memory) + +The ops below are probably code exclusive. The remaining " and ' can switch +modes? Maybe just "section" to start a section? Leaving ' as the only impl-def? ``` ### Summary @@ -17,7 +29,7 @@ for most operations that take immediates. 000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI` 001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F FREE` 010-|`G LDOI`|`H LDOS`|`I PUTI`|`J MOFF`|`K MCPY`|`L ROLL`|`M MNEW`|`N PUTN` -011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T ????`|`U CMPU`|`V B_OR` +011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T PUSH`|`U CMPU`|`V B_OR` 100-|`W ITOW`|`X PUTX`|`Y PUTY`|`Z PUTZ`|`a GETA`|`b GETB`|`c GETC`|`d MSET` 101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC` 110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP` @@ -25,11 +37,11 @@ for most operations that take immediates. 200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU` 201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN` 210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ BSHL`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL` -211-|`( DPTH`|`) PACK`|`' IMP1`|`" IMP2`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT` +211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT` [Operations by code][ops-by-code] In this table x, y, and z are integers; a, b, and c are floating point numbers; -p and q are memory pointers; and f is a function pointer. Additionally +p and q are memory pointers; and f is a function pointer. Additionally, → indicates a basic operation, while ⇒ indicates an operation using an immediate value, and ↔ indicates a meta operation. @@ -64,7 +76,7 @@ P PICK| … x ‥ z ⇒ … x ‥ z x | pick stack element at immediate place Q LDOW| … p ⇒ … x | load word at p plus immediate offset in x R MALL| … x → … p | allocate memory for x bytes at p S ITOS| … x → … x | truncate x to 16 bits, then sign extend -T ????| | reserved for future instruction +T PUSH| … x . y z ⇒ … y x ‥ z | push top of stack down by immediate places (opposite of ROLL) U CMPU| … x y → … z | compare x to y unsigned and set z such that x o y is z o 0 V B_OR| … x y → … z | bitwise OR x and y and store in z W ITOW| … x → … x | truncate x to 32 bits, then sign extend @@ -125,8 +137,8 @@ $ VALU| … ↔ … * | load constant value \: LABL|… f ↔ … | label code location ( DPTH| … → … x | set x to depth of stack (before x) ) PACK| … p x → … | pack x elements of stack (before p) into array p -' IMP1| | implementation defined reserved operation 1 (no immediate) -" IMP2| | implementation defined reserved operation 2 (immediate) +' DATA| | Embed data +" SECT| | Change section \` BKPT| … ↔ … | trigger breakpoint, or exit if not debugging ␣ NOOP| … ↔ … | do nothing, maybe end identifier definition ¶ BEAT| … ↔ … | mark a beat for relative branching diff --git a/samples/syntax.atc b/samples/syntax.atc new file mode 100644 index 0000000..bc924ca --- /dev/null +++ b/samples/syntax.atc @@ -0,0 +1,26 @@ +#!/usr/bin/env arctic + +"data" # read-only memory +:value '10' # 1 int +:array '[1, 2, 3]' # 3 int array + +"state" # initialized read-write memory +:variable '10' # 1 int +:buffer '[1, 2, 3]' # 3 int array + +"memory" # uninitialized read-write memory +:pool '{10}' # 10 byte pool + +"constants" # ephemeral data +:const '10' # 1 universal integer + +"macros" # ephemeral code +:foo +# body of foo here + +"code" # loaded code +:bar +# body of bar here + +:_baz # private label, not "exported" +# body of baz here diff --git a/src/arctic.c b/src/arctic.c index ca75424..94b3afd 100644 --- a/src/arctic.c +++ b/src/arctic.c @@ -2,66 +2,121 @@ #include -int arctic_max_expanded_size(int compressed_size) { - /* at most every 6th byte is free */ - return (6 * compressed_size) / 5 + 1; -} +const char ARCTIC_CODE_PAGE[97] = + "0123456789ABCDEF" + "GHIJKLMNOPQRSTUV" + "WXYZabcdefghijkl" + "mnopqrstuvwxyz_." + "+-*/%\\|$@<{=}>,;" + "&^![]?~:()'\"` #\n" +; -int arctic_max_compressed_size(int expanded_size) { - return (5 * expanded_size) / 6 + 1; -} +/* ops with an immediate argument */ +#define FOR_IMMEDIATE_OPS(X) \ + X('0') X('G') X('H') X('L') X('M') X('P') X('Q') X('d') X('o') X('u') \ + X('<') X('{') X('=') X('}') X('>') X(',') X(';') X('[') X(']') -void arctic_expand(char *expanded, const char *compressed) { - // TODO -} +#define FOR_PLAIN_OPS(X) \ + X('1') X('2') X('3') X('4') X('5') X('6') X('7') X('8') X('9') X('A') \ + X('B') X('C') X('D') X('E') X('F') X('I') X('J') X('K') X('N') X('O') \ + X('R') X('S') X('U') X('V') X('W') X('X') X('Y') X('Z') X('a') X('b') \ + X('c') X('e') X('f') X('g') X('h') X('i') X('j') X('k') X('l') X('m') \ + X('n') X('p') X('q') X('r') X('s') X('t') X('v') X('w') X('x') X('y') \ + X('z') X('_') X('.') X('+') X('-') X('*') X('/') X('%') X('\\') X('|') \ + X('&') X('^') X('!') X('?') X('(') X(')') X('`') -void arctic_compress(char *compressed, const char *expanded) { - // TODO -} +#define CASE(X, ...) case X: -static const char *parse_immediate(const char *expanded, struct ArcticOperation *op) { - return expanded; // TODO -} +#define chrncat(str, chr, n, fail) do { \ + char *last; \ + for (last = str; *last; last++) { \ + if (last >= str + n) { \ + fail; \ + } \ + } \ + *last++ = chr; \ + *last = 0; \ +} while (0) -void arctic_normalize(struct ArcticOperation *ops, const char *expanded) { - const char *o = expanded; - struct ArcticOperation *op = ops; +enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next) { + switch (scanner->buf[0]) { + case 0: /* initial state */ + switch (next) { + case ' ': + return ARCTIC_OK; - *op = (struct ArcticOperation){ 0 }; + FOR_IMMEDIATE_OPS(CASE) + case '#': + case ':': + case '\"': + case '\'': + scanner->buf[0] = next; + scanner->buf[1] = 0; + return ARCTIC_OK; - while (*o) { - int opcode = *o++; + FOR_PLAIN_OPS(CASE) + case '\n': + scanner->op_callback(next, 0, scanner->data); + return ARCTIC_OK; - switch (*o) { - /* ops with an immediate */ - case '0': case 'G': case 'H': case 'L': case 'M': case 'P': - case 'Q': case 'd': case 'o': case 'u': case '<': case '{': - case '=': case '}': case '>': case ',': case ';': case '[': - case ']': case '"': - o = parse_immediate(o, op); + default: + return ARCTIC_UNEXPECTED_CHAR; + } - /* ops without an immediate */ - case '1': case '2': case '3': case '4': case '5': case '6': - case '7': case '8': case '9': case 'A': case 'B': case 'C': - case 'D': case 'E': case 'F': case 'I': case 'J': case 'K': - case 'N': case 'O': case 'R': case 'S': case 'U': case 'V': - case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': - case 'c': case 'e': case 'f': case 'g': case 'h': case 'i': - case 'j': case 'k': case 'l': case 'm': case 'n': case 'p': - case 'q': case 'r': case 's': case 't': case 'v': case 'w': - case 'x': case 'y': case 'z': case '_': case '.': case '+': - case '-': case '*': case '/': case '%': case '\\': case '|': - case '&': case '^': case '!': case '?': case '(': case ')': - op->opcode = opcode; - *++op = (struct ArcticOperation){ 0 }; - break; + case '#': /* comment */ + if (next == '\n') + scanner->buf[0] = 0; + return ARCTIC_OK; - /* unusual ops */ - case '$': case '@': case ':': case '`': case ' ': case '\n': - case '\'': + case '"': /* section switch */ + if (next == '"') { + scanner->section_callback(scanner->buf + 1, scanner->data); + scanner->buf[0] = 0; + } else { + chrncat( + scanner->buf, next, ARCTIC_BUFSIZE, + return ARCTIC_BUFFER_FULL + ); + } + return ARCTIC_OK; - default: /* non-active characters */ - break; - } + case ':': /* label name */ + if (next == ' ') { + scanner->label_callback(scanner->buf + 1, scanner->data); + scanner->buf[0] = 0; + } else { + chrncat( + scanner->buf, next, ARCTIC_BUFSIZE, + return ARCTIC_BUFFER_FULL + ); + } + return ARCTIC_OK; + + case '\'': /* data injection */ + if (next == '\'') { + scanner->data_callback(scanner->buf + 1, scanner->data); + scanner->buf[0] = 0; + } else { + chrncat( + scanner->buf, next, ARCTIC_BUFSIZE, + return ARCTIC_BUFFER_FULL + ); + } + return ARCTIC_OK; + + FOR_IMMEDIATE_OPS(CASE) /* immediate ops */ + if (next == ' ') { + scanner->op_callback(scanner->buf[0], scanner->buf + 1, scanner->data); + scanner->buf[0] = 0; + } else { + chrncat( + scanner->buf, next, ARCTIC_BUFSIZE, + return ARCTIC_BUFFER_FULL + ); + } + return ARCTIC_OK; + + default: + return ARCTIC_INVALID_STATE; } } diff --git a/src/arctic.h b/src/arctic.h index 36e9ba5..6e043e1 100644 --- a/src/arctic.h +++ b/src/arctic.h @@ -1,41 +1,59 @@ /* ARCTIC library header. To be used as a utility by interpreters and compilers. * Author: Louis A. Burke + * + * Does not require dynamic memory or a c standard library, so as to be easy to + * use on e.g. microcontrollers. */ #ifndef ARCTIC_H #define ARCTIC_H -#include +#ifndef ARCTIC_BUFSIZE +#define ARCTIC_BUFSIZE 1024 +#endif /* ARCTIC_BUFSIZE */ /* encoding/decoding */ -int arctic_max_expanded_size(int compressed_size); -int arctic_max_compressed_size(int expanded_size); -void arctic_expand(char *expanded, const char *compressed); -void arctic_compress(char *compressed, const char *expanded); +extern const char ARCTIC_CODE_PAGE[97]; -/* normalization */ -struct ArcticIdentifier { - const char *start; /* a pointer into the input, or null */ - int length; +/* scanning */ +enum ArcticImmediateKind { + ARCTIC_NONE, ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER }; -enum ArcticImmediateKind { ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER }; +struct ArcticScanner { + void *data; /* callback data pointer */ -struct ArcticImmediate { - enum ArcticImmediateKind kind; + void (*section_callback)( + const char *name, /* the name of the section */ + void *data /* callback data */ + ); - union { - struct ArcticIdentifier name; - int64_t integer; - double number; - }; + void (*label_callback)( + const char *id, /* the identifier itself */ + void *data /* callback data */ + ); + + void (*op_callback)( + char opcode, /* the character code of the operation */ + const char *im, /* the immediate value, if it exists */ + void *data /* callback data */ + ); + + void (*data_callback)( + const char *init, /* initialization code */ + void *data /* callback data */ + ); + + char buf[ARCTIC_BUFSIZE]; }; -struct ArcticOperation { - struct ArcticIdentifier label; - struct ArcticImmediate immediate; - int opcode; /* won't be beat or nop or comm */ +enum ArcticErrorCode { + ARCTIC_OK = 0, + ARCTIC_UNEXPECTED_CHAR, /* not necessarily an error */ + ARCTIC_INVALID_STATE, + ARCTIC_BUFFER_FULL }; -void arctic_normalize(struct ArcticOperation *ops, const char *expanded); +/* returns 0 on success, or an error code */ +enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next); #endif /* ARCTIC_H */