Wrote basic scanning library

This commit is contained in:
Louis Burke 2023-09-07 15:13:42 -04:00
parent e52878c5e2
commit 85cc1fc97f
4 changed files with 189 additions and 78 deletions

View file

@ -8,6 +8,18 @@ result can be more easily precompiled for native execution.
```{=todo} ```{=todo}
Notate that immediates can be: …###, …###.###, …$name, or …v (for abcinxyz) Notate that immediates can be: …###, …###.###, …$name, or …v (for abcinxyz)
for most operations that take immediates. for most operations that take immediates.
Sections, a la ELF. Probably want:
- code
- macros (ephemeral "code")
- constants (ephemeral "data")
- data (read-only memory)
- state (initialized memory)
- memory (uninitialized memory)
The ops below are probably code exclusive. The remaining " and ' can switch
modes? Maybe just "section" to start a section? Leaving ' as the only impl-def?
``` ```
### Summary ### Summary
@ -17,7 +29,7 @@ for most operations that take immediates.
000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI` 000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI`
001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F FREE` 001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F FREE`
010-|`G LDOI`|`H LDOS`|`I PUTI`|`J MOFF`|`K MCPY`|`L ROLL`|`M MNEW`|`N PUTN` 010-|`G LDOI`|`H LDOS`|`I PUTI`|`J MOFF`|`K MCPY`|`L ROLL`|`M MNEW`|`N PUTN`
011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T ????`|`U CMPU`|`V B_OR` 011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T PUSH`|`U CMPU`|`V B_OR`
100-|`W ITOW`|`X PUTX`|`Y PUTY`|`Z PUTZ`|`a GETA`|`b GETB`|`c GETC`|`d MSET` 100-|`W ITOW`|`X PUTX`|`Y PUTY`|`Z PUTZ`|`a GETA`|`b GETB`|`c GETC`|`d MSET`
101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC` 101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC`
110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP` 110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP`
@ -25,11 +37,11 @@ for most operations that take immediates.
200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU` 200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU`
201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN` 201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN`
210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ BSHL`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL` 210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ BSHL`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL`
211-|`( DPTH`|`) PACK`|`' IMP1`|`" IMP2`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT` 211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT`
[Operations by code][ops-by-code] [Operations by code][ops-by-code]
In this table x, y, and z are integers; a, b, and c are floating point numbers; In this table x, y, and z are integers; a, b, and c are floating point numbers;
p and q are memory pointers; and f is a function pointer. Additionally p and q are memory pointers; and f is a function pointer. Additionally,
→ indicates a basic operation, while ⇒ indicates an operation using an immediate → indicates a basic operation, while ⇒ indicates an operation using an immediate
value, and ↔ indicates a meta operation. value, and ↔ indicates a meta operation.
@ -64,7 +76,7 @@ P PICK| … x ‥ z ⇒ … x ‥ z x | pick stack element at immediate place
Q LDOW| … p ⇒ … x | load word at p plus immediate offset in x Q LDOW| … p ⇒ … x | load word at p plus immediate offset in x
R MALL| … x → … p | allocate memory for x bytes at p R MALL| … x → … p | allocate memory for x bytes at p
S ITOS| … x → … x | truncate x to 16 bits, then sign extend S ITOS| … x → … x | truncate x to 16 bits, then sign extend
T ????| | reserved for future instruction T PUSH| … x . y z ⇒ … y x ‥ z | push top of stack down by immediate places (opposite of ROLL)
U CMPU| … x y → … z | compare x to y unsigned and set z such that x o y is z o 0 U CMPU| … x y → … z | compare x to y unsigned and set z such that x o y is z o 0
V B_OR| … x y → … z | bitwise OR x and y and store in z V B_OR| … x y → … z | bitwise OR x and y and store in z
W ITOW| … x → … x | truncate x to 32 bits, then sign extend W ITOW| … x → … x | truncate x to 32 bits, then sign extend
@ -125,8 +137,8 @@ $ VALU| … ↔ … * | load constant value
\: LABL|… f ↔ … | label code location \: LABL|… f ↔ … | label code location
( DPTH| … → … x | set x to depth of stack (before x) ( DPTH| … → … x | set x to depth of stack (before x)
) PACK| … p x → … | pack x elements of stack (before p) into array p ) PACK| … p x → … | pack x elements of stack (before p) into array p
' IMP1| | implementation defined reserved operation 1 (no immediate) ' DATA| | Embed data
" IMP2| | implementation defined reserved operation 2 (immediate) " SECT| | Change section
\` BKPT| … ↔ … | trigger breakpoint, or exit if not debugging \` BKPT| … ↔ … | trigger breakpoint, or exit if not debugging
␣ NOOP| … ↔ … | do nothing, maybe end identifier definition ␣ NOOP| … ↔ … | do nothing, maybe end identifier definition
¶ BEAT| … ↔ … | mark a beat for relative branching ¶ BEAT| … ↔ … | mark a beat for relative branching

26
samples/syntax.atc Normal file
View file

@ -0,0 +1,26 @@
#!/usr/bin/env arctic
"data" # read-only memory
:value '10' # 1 int
:array '[1, 2, 3]' # 3 int array
"state" # initialized read-write memory
:variable '10' # 1 int
:buffer '[1, 2, 3]' # 3 int array
"memory" # uninitialized read-write memory
:pool '{10}' # 10 byte pool
"constants" # ephemeral data
:const '10' # 1 universal integer
"macros" # ephemeral code
:foo
# body of foo here
"code" # loaded code
:bar
# body of bar here
:_baz # private label, not "exported"
# body of baz here

View file

@ -2,66 +2,121 @@
#include <string.h> #include <string.h>
int arctic_max_expanded_size(int compressed_size) { const char ARCTIC_CODE_PAGE[97] =
/* at most every 6th byte is free */ "0123456789ABCDEF"
return (6 * compressed_size) / 5 + 1; "GHIJKLMNOPQRSTUV"
} "WXYZabcdefghijkl"
"mnopqrstuvwxyz_."
"+-*/%\\|$@<{=}>,;"
"&^![]?~:()'\"` #\n"
;
int arctic_max_compressed_size(int expanded_size) { /* ops with an immediate argument */
return (5 * expanded_size) / 6 + 1; #define FOR_IMMEDIATE_OPS(X) \
} X('0') X('G') X('H') X('L') X('M') X('P') X('Q') X('d') X('o') X('u') \
X('<') X('{') X('=') X('}') X('>') X(',') X(';') X('[') X(']')
void arctic_expand(char *expanded, const char *compressed) { #define FOR_PLAIN_OPS(X) \
// TODO X('1') X('2') X('3') X('4') X('5') X('6') X('7') X('8') X('9') X('A') \
} X('B') X('C') X('D') X('E') X('F') X('I') X('J') X('K') X('N') X('O') \
X('R') X('S') X('U') X('V') X('W') X('X') X('Y') X('Z') X('a') X('b') \
X('c') X('e') X('f') X('g') X('h') X('i') X('j') X('k') X('l') X('m') \
X('n') X('p') X('q') X('r') X('s') X('t') X('v') X('w') X('x') X('y') \
X('z') X('_') X('.') X('+') X('-') X('*') X('/') X('%') X('\\') X('|') \
X('&') X('^') X('!') X('?') X('(') X(')') X('`')
void arctic_compress(char *compressed, const char *expanded) { #define CASE(X, ...) case X:
// TODO
}
static const char *parse_immediate(const char *expanded, struct ArcticOperation *op) { #define chrncat(str, chr, n, fail) do { \
return expanded; // TODO char *last; \
} for (last = str; *last; last++) { \
if (last >= str + n) { \
fail; \
} \
} \
*last++ = chr; \
*last = 0; \
} while (0)
void arctic_normalize(struct ArcticOperation *ops, const char *expanded) { enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next) {
const char *o = expanded; switch (scanner->buf[0]) {
struct ArcticOperation *op = ops; case 0: /* initial state */
switch (next) {
case ' ':
return ARCTIC_OK;
*op = (struct ArcticOperation){ 0 }; FOR_IMMEDIATE_OPS(CASE)
case '#':
case ':':
case '\"':
case '\'':
scanner->buf[0] = next;
scanner->buf[1] = 0;
return ARCTIC_OK;
while (*o) { FOR_PLAIN_OPS(CASE)
int opcode = *o++; case '\n':
scanner->op_callback(next, 0, scanner->data);
return ARCTIC_OK;
switch (*o) { default:
/* ops with an immediate */ return ARCTIC_UNEXPECTED_CHAR;
case '0': case 'G': case 'H': case 'L': case 'M': case 'P': }
case 'Q': case 'd': case 'o': case 'u': case '<': case '{':
case '=': case '}': case '>': case ',': case ';': case '[':
case ']': case '"':
o = parse_immediate(o, op);
/* ops without an immediate */ case '#': /* comment */
case '1': case '2': case '3': case '4': case '5': case '6': if (next == '\n')
case '7': case '8': case '9': case 'A': case 'B': case 'C': scanner->buf[0] = 0;
case 'D': case 'E': case 'F': case 'I': case 'J': case 'K': return ARCTIC_OK;
case 'N': case 'O': case 'R': case 'S': case 'U': case 'V':
case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b':
case 'c': case 'e': case 'f': case 'g': case 'h': case 'i':
case 'j': case 'k': case 'l': case 'm': case 'n': case 'p':
case 'q': case 'r': case 's': case 't': case 'v': case 'w':
case 'x': case 'y': case 'z': case '_': case '.': case '+':
case '-': case '*': case '/': case '%': case '\\': case '|':
case '&': case '^': case '!': case '?': case '(': case ')':
op->opcode = opcode;
*++op = (struct ArcticOperation){ 0 };
break;
/* unusual ops */ case '"': /* section switch */
case '$': case '@': case ':': case '`': case ' ': case '\n': if (next == '"') {
case '\'': scanner->section_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
default: /* non-active characters */ case ':': /* label name */
break; if (next == ' ') {
} scanner->label_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
case '\'': /* data injection */
if (next == '\'') {
scanner->data_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
FOR_IMMEDIATE_OPS(CASE) /* immediate ops */
if (next == ' ') {
scanner->op_callback(scanner->buf[0], scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
default:
return ARCTIC_INVALID_STATE;
} }
} }

View file

@ -1,41 +1,59 @@
/* ARCTIC library header. To be used as a utility by interpreters and compilers. /* ARCTIC library header. To be used as a utility by interpreters and compilers.
* Author: Louis A. Burke * Author: Louis A. Burke
*
* Does not require dynamic memory or a c standard library, so as to be easy to
* use on e.g. microcontrollers.
*/ */
#ifndef ARCTIC_H #ifndef ARCTIC_H
#define ARCTIC_H #define ARCTIC_H
#include <stdint.h> #ifndef ARCTIC_BUFSIZE
#define ARCTIC_BUFSIZE 1024
#endif /* ARCTIC_BUFSIZE */
/* encoding/decoding */ /* encoding/decoding */
int arctic_max_expanded_size(int compressed_size); extern const char ARCTIC_CODE_PAGE[97];
int arctic_max_compressed_size(int expanded_size);
void arctic_expand(char *expanded, const char *compressed);
void arctic_compress(char *compressed, const char *expanded);
/* normalization */ /* scanning */
struct ArcticIdentifier { enum ArcticImmediateKind {
const char *start; /* a pointer into the input, or null */ ARCTIC_NONE, ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER
int length;
}; };
enum ArcticImmediateKind { ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER }; struct ArcticScanner {
void *data; /* callback data pointer */
struct ArcticImmediate { void (*section_callback)(
enum ArcticImmediateKind kind; const char *name, /* the name of the section */
void *data /* callback data */
);
union { void (*label_callback)(
struct ArcticIdentifier name; const char *id, /* the identifier itself */
int64_t integer; void *data /* callback data */
double number; );
};
void (*op_callback)(
char opcode, /* the character code of the operation */
const char *im, /* the immediate value, if it exists */
void *data /* callback data */
);
void (*data_callback)(
const char *init, /* initialization code */
void *data /* callback data */
);
char buf[ARCTIC_BUFSIZE];
}; };
struct ArcticOperation { enum ArcticErrorCode {
struct ArcticIdentifier label; ARCTIC_OK = 0,
struct ArcticImmediate immediate; ARCTIC_UNEXPECTED_CHAR, /* not necessarily an error */
int opcode; /* won't be beat or nop or comm */ ARCTIC_INVALID_STATE,
ARCTIC_BUFFER_FULL
}; };
void arctic_normalize(struct ArcticOperation *ops, const char *expanded); /* returns 0 on success, or an error code */
enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next);
#endif /* ARCTIC_H */ #endif /* ARCTIC_H */