Wrote basic scanning library

This commit is contained in:
Louis Burke 2023-09-07 15:13:42 -04:00
parent e52878c5e2
commit 85cc1fc97f
4 changed files with 189 additions and 78 deletions

View file

@ -8,6 +8,18 @@ result can be more easily precompiled for native execution.
```{=todo}
Notate that immediates can be: …###, …###.###, …$name, or …v (for abcinxyz)
for most operations that take immediates.
Sections, a la ELF. Probably want:
- code
- macros (ephemeral "code")
- constants (ephemeral "data")
- data (read-only memory)
- state (initialized memory)
- memory (uninitialized memory)
The ops below are probably code exclusive. The remaining " and ' can switch
modes? Maybe just "section" to start a section? Leaving ' as the only impl-def?
```
### Summary
@ -17,7 +29,7 @@ for most operations that take immediates.
000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI`
001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F FREE`
010-|`G LDOI`|`H LDOS`|`I PUTI`|`J MOFF`|`K MCPY`|`L ROLL`|`M MNEW`|`N PUTN`
011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T ????`|`U CMPU`|`V B_OR`
011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T PUSH`|`U CMPU`|`V B_OR`
100-|`W ITOW`|`X PUTX`|`Y PUTY`|`Z PUTZ`|`a GETA`|`b GETB`|`c GETC`|`d MSET`
101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC`
110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP`
@ -25,11 +37,11 @@ for most operations that take immediates.
200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU`
201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN`
210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ BSHL`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL`
211-|`( DPTH`|`) PACK`|`' IMP1`|`" IMP2`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT`
211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT`
[Operations by code][ops-by-code]
In this table x, y, and z are integers; a, b, and c are floating point numbers;
p and q are memory pointers; and f is a function pointer. Additionally
p and q are memory pointers; and f is a function pointer. Additionally,
→ indicates a basic operation, while ⇒ indicates an operation using an immediate
value, and ↔ indicates a meta operation.
@ -64,7 +76,7 @@ P PICK| … x ‥ z ⇒ … x ‥ z x | pick stack element at immediate place
Q LDOW| … p ⇒ … x | load word at p plus immediate offset in x
R MALL| … x → … p | allocate memory for x bytes at p
S ITOS| … x → … x | truncate x to 16 bits, then sign extend
T ????| | reserved for future instruction
T PUSH| … x . y z ⇒ … y x ‥ z | push top of stack down by immediate places (opposite of ROLL)
U CMPU| … x y → … z | compare x to y unsigned and set z such that x o y is z o 0
V B_OR| … x y → … z | bitwise OR x and y and store in z
W ITOW| … x → … x | truncate x to 32 bits, then sign extend
@ -125,8 +137,8 @@ $ VALU| … ↔ … * | load constant value
\: LABL|… f ↔ … | label code location
( DPTH| … → … x | set x to depth of stack (before x)
) PACK| … p x → … | pack x elements of stack (before p) into array p
' IMP1| | implementation defined reserved operation 1 (no immediate)
" IMP2| | implementation defined reserved operation 2 (immediate)
' DATA| | Embed data
" SECT| | Change section
\` BKPT| … ↔ … | trigger breakpoint, or exit if not debugging
␣ NOOP| … ↔ … | do nothing, maybe end identifier definition
¶ BEAT| … ↔ … | mark a beat for relative branching

26
samples/syntax.atc Normal file
View file

@ -0,0 +1,26 @@
#!/usr/bin/env arctic
"data" # read-only memory
:value '10' # 1 int
:array '[1, 2, 3]' # 3 int array
"state" # initialized read-write memory
:variable '10' # 1 int
:buffer '[1, 2, 3]' # 3 int array
"memory" # uninitialized read-write memory
:pool '{10}' # 10 byte pool
"constants" # ephemeral data
:const '10' # 1 universal integer
"macros" # ephemeral code
:foo
# body of foo here
"code" # loaded code
:bar
# body of bar here
:_baz # private label, not "exported"
# body of baz here

View file

@ -2,66 +2,121 @@
#include <string.h>
int arctic_max_expanded_size(int compressed_size) {
/* at most every 6th byte is free */
return (6 * compressed_size) / 5 + 1;
}
const char ARCTIC_CODE_PAGE[97] =
"0123456789ABCDEF"
"GHIJKLMNOPQRSTUV"
"WXYZabcdefghijkl"
"mnopqrstuvwxyz_."
"+-*/%\\|$@<{=}>,;"
"&^![]?~:()'\"` #\n"
;
int arctic_max_compressed_size(int expanded_size) {
return (5 * expanded_size) / 6 + 1;
}
/* ops with an immediate argument */
#define FOR_IMMEDIATE_OPS(X) \
X('0') X('G') X('H') X('L') X('M') X('P') X('Q') X('d') X('o') X('u') \
X('<') X('{') X('=') X('}') X('>') X(',') X(';') X('[') X(']')
void arctic_expand(char *expanded, const char *compressed) {
// TODO
}
#define FOR_PLAIN_OPS(X) \
X('1') X('2') X('3') X('4') X('5') X('6') X('7') X('8') X('9') X('A') \
X('B') X('C') X('D') X('E') X('F') X('I') X('J') X('K') X('N') X('O') \
X('R') X('S') X('U') X('V') X('W') X('X') X('Y') X('Z') X('a') X('b') \
X('c') X('e') X('f') X('g') X('h') X('i') X('j') X('k') X('l') X('m') \
X('n') X('p') X('q') X('r') X('s') X('t') X('v') X('w') X('x') X('y') \
X('z') X('_') X('.') X('+') X('-') X('*') X('/') X('%') X('\\') X('|') \
X('&') X('^') X('!') X('?') X('(') X(')') X('`')
void arctic_compress(char *compressed, const char *expanded) {
// TODO
}
#define CASE(X, ...) case X:
static const char *parse_immediate(const char *expanded, struct ArcticOperation *op) {
return expanded; // TODO
}
#define chrncat(str, chr, n, fail) do { \
char *last; \
for (last = str; *last; last++) { \
if (last >= str + n) { \
fail; \
} \
} \
*last++ = chr; \
*last = 0; \
} while (0)
void arctic_normalize(struct ArcticOperation *ops, const char *expanded) {
const char *o = expanded;
struct ArcticOperation *op = ops;
enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next) {
switch (scanner->buf[0]) {
case 0: /* initial state */
switch (next) {
case ' ':
return ARCTIC_OK;
*op = (struct ArcticOperation){ 0 };
FOR_IMMEDIATE_OPS(CASE)
case '#':
case ':':
case '\"':
case '\'':
scanner->buf[0] = next;
scanner->buf[1] = 0;
return ARCTIC_OK;
while (*o) {
int opcode = *o++;
FOR_PLAIN_OPS(CASE)
case '\n':
scanner->op_callback(next, 0, scanner->data);
return ARCTIC_OK;
switch (*o) {
/* ops with an immediate */
case '0': case 'G': case 'H': case 'L': case 'M': case 'P':
case 'Q': case 'd': case 'o': case 'u': case '<': case '{':
case '=': case '}': case '>': case ',': case ';': case '[':
case ']': case '"':
o = parse_immediate(o, op);
default:
return ARCTIC_UNEXPECTED_CHAR;
}
/* ops without an immediate */
case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9': case 'A': case 'B': case 'C':
case 'D': case 'E': case 'F': case 'I': case 'J': case 'K':
case 'N': case 'O': case 'R': case 'S': case 'U': case 'V':
case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b':
case 'c': case 'e': case 'f': case 'g': case 'h': case 'i':
case 'j': case 'k': case 'l': case 'm': case 'n': case 'p':
case 'q': case 'r': case 's': case 't': case 'v': case 'w':
case 'x': case 'y': case 'z': case '_': case '.': case '+':
case '-': case '*': case '/': case '%': case '\\': case '|':
case '&': case '^': case '!': case '?': case '(': case ')':
op->opcode = opcode;
*++op = (struct ArcticOperation){ 0 };
break;
case '#': /* comment */
if (next == '\n')
scanner->buf[0] = 0;
return ARCTIC_OK;
/* unusual ops */
case '$': case '@': case ':': case '`': case ' ': case '\n':
case '\'':
case '"': /* section switch */
if (next == '"') {
scanner->section_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
default: /* non-active characters */
break;
}
case ':': /* label name */
if (next == ' ') {
scanner->label_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
case '\'': /* data injection */
if (next == '\'') {
scanner->data_callback(scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
FOR_IMMEDIATE_OPS(CASE) /* immediate ops */
if (next == ' ') {
scanner->op_callback(scanner->buf[0], scanner->buf + 1, scanner->data);
scanner->buf[0] = 0;
} else {
chrncat(
scanner->buf, next, ARCTIC_BUFSIZE,
return ARCTIC_BUFFER_FULL
);
}
return ARCTIC_OK;
default:
return ARCTIC_INVALID_STATE;
}
}

View file

@ -1,41 +1,59 @@
/* ARCTIC library header. To be used as a utility by interpreters and compilers.
* Author: Louis A. Burke
*
* Does not require dynamic memory or a c standard library, so as to be easy to
* use on e.g. microcontrollers.
*/
#ifndef ARCTIC_H
#define ARCTIC_H
#include <stdint.h>
#ifndef ARCTIC_BUFSIZE
#define ARCTIC_BUFSIZE 1024
#endif /* ARCTIC_BUFSIZE */
/* encoding/decoding */
int arctic_max_expanded_size(int compressed_size);
int arctic_max_compressed_size(int expanded_size);
void arctic_expand(char *expanded, const char *compressed);
void arctic_compress(char *compressed, const char *expanded);
extern const char ARCTIC_CODE_PAGE[97];
/* normalization */
struct ArcticIdentifier {
const char *start; /* a pointer into the input, or null */
int length;
/* scanning */
enum ArcticImmediateKind {
ARCTIC_NONE, ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER
};
enum ArcticImmediateKind { ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER };
struct ArcticScanner {
void *data; /* callback data pointer */
struct ArcticImmediate {
enum ArcticImmediateKind kind;
void (*section_callback)(
const char *name, /* the name of the section */
void *data /* callback data */
);
union {
struct ArcticIdentifier name;
int64_t integer;
double number;
};
void (*label_callback)(
const char *id, /* the identifier itself */
void *data /* callback data */
);
void (*op_callback)(
char opcode, /* the character code of the operation */
const char *im, /* the immediate value, if it exists */
void *data /* callback data */
);
void (*data_callback)(
const char *init, /* initialization code */
void *data /* callback data */
);
char buf[ARCTIC_BUFSIZE];
};
struct ArcticOperation {
struct ArcticIdentifier label;
struct ArcticImmediate immediate;
int opcode; /* won't be beat or nop or comm */
enum ArcticErrorCode {
ARCTIC_OK = 0,
ARCTIC_UNEXPECTED_CHAR, /* not necessarily an error */
ARCTIC_INVALID_STATE,
ARCTIC_BUFFER_FULL
};
void arctic_normalize(struct ArcticOperation *ops, const char *expanded);
/* returns 0 on success, or an error code */
enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next);
#endif /* ARCTIC_H */