Wrote basic scanning library
This commit is contained in:
parent
e52878c5e2
commit
85cc1fc97f
|
@ -8,6 +8,18 @@ result can be more easily precompiled for native execution.
|
|||
```{=todo}
|
||||
Notate that immediates can be: …###, …###.###, …$name, or …v (for abcinxyz)
|
||||
for most operations that take immediates.
|
||||
|
||||
Sections, a la ELF. Probably want:
|
||||
|
||||
- code
|
||||
- macros (ephemeral "code")
|
||||
- constants (ephemeral "data")
|
||||
- data (read-only memory)
|
||||
- state (initialized memory)
|
||||
- memory (uninitialized memory)
|
||||
|
||||
The ops below are probably code exclusive. The remaining " and ' can switch
|
||||
modes? Maybe just "section" to start a section? Leaving ' as the only impl-def?
|
||||
```
|
||||
|
||||
### Summary
|
||||
|
@ -17,7 +29,7 @@ for most operations that take immediates.
|
|||
000-|`0 BNEZ`|`1 LDAB`|`2 LDAS`|`3 LDAI`|`4 LDAW`|`5 STAB`|`6 STAS`|`7 STAI`
|
||||
001-|`8 STAW`|`9 CASS`|`A PUTA`|`B PUTB`|`C PUTC`|`D MCLR`|`E FTOI`|`F FREE`
|
||||
010-|`G LDOI`|`H LDOS`|`I PUTI`|`J MOFF`|`K MCPY`|`L ROLL`|`M MNEW`|`N PUTN`
|
||||
011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T ????`|`U CMPU`|`V B_OR`
|
||||
011-|`O ITOC`|`P PICK`|`Q LDOW`|`R MALL`|`S ITOS`|`T PUSH`|`U CMPU`|`V B_OR`
|
||||
100-|`W ITOW`|`X PUTX`|`Y PUTY`|`Z PUTZ`|`a GETA`|`b GETB`|`c GETC`|`d MSET`
|
||||
101-|`e SROT`|`f FMOD`|`g FINV`|`h OVER`|`i GETI`|`j FNEG`|`k SPOP`|`l BITC`
|
||||
110-|`m FSUB`|`n GETN`|`o LDOB`|`p FMUL`|`q FDIV`|`r FREM`|`s FADD`|`t SWAP`
|
||||
|
@ -25,11 +37,11 @@ for most operations that take immediates.
|
|||
200-|`+ IADD`|`- ISUB`|`* IMUL`|`/ IDIV`|`% IMOD`|`\ IREM`|`| IABS`|`$ VALU`
|
||||
201-|`@ CALL`|`< BLTZ`|`{ BLEZ`|`= BEQZ`|`} BGEZ`|`> BGTZ`|`, JUMP`|`; RTRN`
|
||||
210-|`& BAND`|`^ BXOR`|`! BNOT`|`[ BSHL`|`] BSHR`|`? CMPI`|`~ CMPF`|`: LABL`
|
||||
211-|`( DPTH`|`) PACK`|`' IMP1`|`" IMP2`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT`
|
||||
211-|`( DPTH`|`) PACK`|`' DATA`|`" SECT`|`\` BKPT`|`␣ NOP`|`# COMM`|`¶ BEAT`
|
||||
[Operations by code][ops-by-code]
|
||||
|
||||
In this table x, y, and z are integers; a, b, and c are floating point numbers;
|
||||
p and q are memory pointers; and f is a function pointer. Additionally
|
||||
p and q are memory pointers; and f is a function pointer. Additionally,
|
||||
→ indicates a basic operation, while ⇒ indicates an operation using an immediate
|
||||
value, and ↔ indicates a meta operation.
|
||||
|
||||
|
@ -64,7 +76,7 @@ P PICK| … x ‥ z ⇒ … x ‥ z x | pick stack element at immediate place
|
|||
Q LDOW| … p ⇒ … x | load word at p plus immediate offset in x
|
||||
R MALL| … x → … p | allocate memory for x bytes at p
|
||||
S ITOS| … x → … x | truncate x to 16 bits, then sign extend
|
||||
T ????| | reserved for future instruction
|
||||
T PUSH| … x . y z ⇒ … y x ‥ z | push top of stack down by immediate places (opposite of ROLL)
|
||||
U CMPU| … x y → … z | compare x to y unsigned and set z such that x o y is z o 0
|
||||
V B_OR| … x y → … z | bitwise OR x and y and store in z
|
||||
W ITOW| … x → … x | truncate x to 32 bits, then sign extend
|
||||
|
@ -125,8 +137,8 @@ $ VALU| … ↔ … * | load constant value
|
|||
\: LABL|… f ↔ … | label code location
|
||||
( DPTH| … → … x | set x to depth of stack (before x)
|
||||
) PACK| … p x → … | pack x elements of stack (before p) into array p
|
||||
' IMP1| | implementation defined reserved operation 1 (no immediate)
|
||||
" IMP2| | implementation defined reserved operation 2 (immediate)
|
||||
' DATA| | Embed data
|
||||
" SECT| | Change section
|
||||
\` BKPT| … ↔ … | trigger breakpoint, or exit if not debugging
|
||||
␣ NOOP| … ↔ … | do nothing, maybe end identifier definition
|
||||
¶ BEAT| … ↔ … | mark a beat for relative branching
|
||||
|
|
26
samples/syntax.atc
Normal file
26
samples/syntax.atc
Normal file
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env arctic
|
||||
|
||||
"data" # read-only memory
|
||||
:value '10' # 1 int
|
||||
:array '[1, 2, 3]' # 3 int array
|
||||
|
||||
"state" # initialized read-write memory
|
||||
:variable '10' # 1 int
|
||||
:buffer '[1, 2, 3]' # 3 int array
|
||||
|
||||
"memory" # uninitialized read-write memory
|
||||
:pool '{10}' # 10 byte pool
|
||||
|
||||
"constants" # ephemeral data
|
||||
:const '10' # 1 universal integer
|
||||
|
||||
"macros" # ephemeral code
|
||||
:foo
|
||||
# body of foo here
|
||||
|
||||
"code" # loaded code
|
||||
:bar
|
||||
# body of bar here
|
||||
|
||||
:_baz # private label, not "exported"
|
||||
# body of baz here
|
155
src/arctic.c
155
src/arctic.c
|
@ -2,66 +2,121 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
int arctic_max_expanded_size(int compressed_size) {
|
||||
/* at most every 6th byte is free */
|
||||
return (6 * compressed_size) / 5 + 1;
|
||||
}
|
||||
const char ARCTIC_CODE_PAGE[97] =
|
||||
"0123456789ABCDEF"
|
||||
"GHIJKLMNOPQRSTUV"
|
||||
"WXYZabcdefghijkl"
|
||||
"mnopqrstuvwxyz_."
|
||||
"+-*/%\\|$@<{=}>,;"
|
||||
"&^![]?~:()'\"` #\n"
|
||||
;
|
||||
|
||||
int arctic_max_compressed_size(int expanded_size) {
|
||||
return (5 * expanded_size) / 6 + 1;
|
||||
}
|
||||
/* ops with an immediate argument */
|
||||
#define FOR_IMMEDIATE_OPS(X) \
|
||||
X('0') X('G') X('H') X('L') X('M') X('P') X('Q') X('d') X('o') X('u') \
|
||||
X('<') X('{') X('=') X('}') X('>') X(',') X(';') X('[') X(']')
|
||||
|
||||
void arctic_expand(char *expanded, const char *compressed) {
|
||||
// TODO
|
||||
}
|
||||
#define FOR_PLAIN_OPS(X) \
|
||||
X('1') X('2') X('3') X('4') X('5') X('6') X('7') X('8') X('9') X('A') \
|
||||
X('B') X('C') X('D') X('E') X('F') X('I') X('J') X('K') X('N') X('O') \
|
||||
X('R') X('S') X('U') X('V') X('W') X('X') X('Y') X('Z') X('a') X('b') \
|
||||
X('c') X('e') X('f') X('g') X('h') X('i') X('j') X('k') X('l') X('m') \
|
||||
X('n') X('p') X('q') X('r') X('s') X('t') X('v') X('w') X('x') X('y') \
|
||||
X('z') X('_') X('.') X('+') X('-') X('*') X('/') X('%') X('\\') X('|') \
|
||||
X('&') X('^') X('!') X('?') X('(') X(')') X('`')
|
||||
|
||||
void arctic_compress(char *compressed, const char *expanded) {
|
||||
// TODO
|
||||
}
|
||||
#define CASE(X, ...) case X:
|
||||
|
||||
static const char *parse_immediate(const char *expanded, struct ArcticOperation *op) {
|
||||
return expanded; // TODO
|
||||
}
|
||||
#define chrncat(str, chr, n, fail) do { \
|
||||
char *last; \
|
||||
for (last = str; *last; last++) { \
|
||||
if (last >= str + n) { \
|
||||
fail; \
|
||||
} \
|
||||
} \
|
||||
*last++ = chr; \
|
||||
*last = 0; \
|
||||
} while (0)
|
||||
|
||||
void arctic_normalize(struct ArcticOperation *ops, const char *expanded) {
|
||||
const char *o = expanded;
|
||||
struct ArcticOperation *op = ops;
|
||||
enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next) {
|
||||
switch (scanner->buf[0]) {
|
||||
case 0: /* initial state */
|
||||
switch (next) {
|
||||
case ' ':
|
||||
return ARCTIC_OK;
|
||||
|
||||
*op = (struct ArcticOperation){ 0 };
|
||||
FOR_IMMEDIATE_OPS(CASE)
|
||||
case '#':
|
||||
case ':':
|
||||
case '\"':
|
||||
case '\'':
|
||||
scanner->buf[0] = next;
|
||||
scanner->buf[1] = 0;
|
||||
return ARCTIC_OK;
|
||||
|
||||
while (*o) {
|
||||
int opcode = *o++;
|
||||
FOR_PLAIN_OPS(CASE)
|
||||
case '\n':
|
||||
scanner->op_callback(next, 0, scanner->data);
|
||||
return ARCTIC_OK;
|
||||
|
||||
switch (*o) {
|
||||
/* ops with an immediate */
|
||||
case '0': case 'G': case 'H': case 'L': case 'M': case 'P':
|
||||
case 'Q': case 'd': case 'o': case 'u': case '<': case '{':
|
||||
case '=': case '}': case '>': case ',': case ';': case '[':
|
||||
case ']': case '"':
|
||||
o = parse_immediate(o, op);
|
||||
default:
|
||||
return ARCTIC_UNEXPECTED_CHAR;
|
||||
}
|
||||
|
||||
/* ops without an immediate */
|
||||
case '1': case '2': case '3': case '4': case '5': case '6':
|
||||
case '7': case '8': case '9': case 'A': case 'B': case 'C':
|
||||
case 'D': case 'E': case 'F': case 'I': case 'J': case 'K':
|
||||
case 'N': case 'O': case 'R': case 'S': case 'U': case 'V':
|
||||
case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b':
|
||||
case 'c': case 'e': case 'f': case 'g': case 'h': case 'i':
|
||||
case 'j': case 'k': case 'l': case 'm': case 'n': case 'p':
|
||||
case 'q': case 'r': case 's': case 't': case 'v': case 'w':
|
||||
case 'x': case 'y': case 'z': case '_': case '.': case '+':
|
||||
case '-': case '*': case '/': case '%': case '\\': case '|':
|
||||
case '&': case '^': case '!': case '?': case '(': case ')':
|
||||
op->opcode = opcode;
|
||||
*++op = (struct ArcticOperation){ 0 };
|
||||
break;
|
||||
case '#': /* comment */
|
||||
if (next == '\n')
|
||||
scanner->buf[0] = 0;
|
||||
return ARCTIC_OK;
|
||||
|
||||
/* unusual ops */
|
||||
case '$': case '@': case ':': case '`': case ' ': case '\n':
|
||||
case '\'':
|
||||
case '"': /* section switch */
|
||||
if (next == '"') {
|
||||
scanner->section_callback(scanner->buf + 1, scanner->data);
|
||||
scanner->buf[0] = 0;
|
||||
} else {
|
||||
chrncat(
|
||||
scanner->buf, next, ARCTIC_BUFSIZE,
|
||||
return ARCTIC_BUFFER_FULL
|
||||
);
|
||||
}
|
||||
return ARCTIC_OK;
|
||||
|
||||
default: /* non-active characters */
|
||||
break;
|
||||
}
|
||||
case ':': /* label name */
|
||||
if (next == ' ') {
|
||||
scanner->label_callback(scanner->buf + 1, scanner->data);
|
||||
scanner->buf[0] = 0;
|
||||
} else {
|
||||
chrncat(
|
||||
scanner->buf, next, ARCTIC_BUFSIZE,
|
||||
return ARCTIC_BUFFER_FULL
|
||||
);
|
||||
}
|
||||
return ARCTIC_OK;
|
||||
|
||||
case '\'': /* data injection */
|
||||
if (next == '\'') {
|
||||
scanner->data_callback(scanner->buf + 1, scanner->data);
|
||||
scanner->buf[0] = 0;
|
||||
} else {
|
||||
chrncat(
|
||||
scanner->buf, next, ARCTIC_BUFSIZE,
|
||||
return ARCTIC_BUFFER_FULL
|
||||
);
|
||||
}
|
||||
return ARCTIC_OK;
|
||||
|
||||
FOR_IMMEDIATE_OPS(CASE) /* immediate ops */
|
||||
if (next == ' ') {
|
||||
scanner->op_callback(scanner->buf[0], scanner->buf + 1, scanner->data);
|
||||
scanner->buf[0] = 0;
|
||||
} else {
|
||||
chrncat(
|
||||
scanner->buf, next, ARCTIC_BUFSIZE,
|
||||
return ARCTIC_BUFFER_FULL
|
||||
);
|
||||
}
|
||||
return ARCTIC_OK;
|
||||
|
||||
default:
|
||||
return ARCTIC_INVALID_STATE;
|
||||
}
|
||||
}
|
||||
|
|
62
src/arctic.h
62
src/arctic.h
|
@ -1,41 +1,59 @@
|
|||
/* ARCTIC library header. To be used as a utility by interpreters and compilers.
|
||||
* Author: Louis A. Burke
|
||||
*
|
||||
* Does not require dynamic memory or a c standard library, so as to be easy to
|
||||
* use on e.g. microcontrollers.
|
||||
*/
|
||||
#ifndef ARCTIC_H
|
||||
#define ARCTIC_H
|
||||
|
||||
#include <stdint.h>
|
||||
#ifndef ARCTIC_BUFSIZE
|
||||
#define ARCTIC_BUFSIZE 1024
|
||||
#endif /* ARCTIC_BUFSIZE */
|
||||
|
||||
/* encoding/decoding */
|
||||
int arctic_max_expanded_size(int compressed_size);
|
||||
int arctic_max_compressed_size(int expanded_size);
|
||||
void arctic_expand(char *expanded, const char *compressed);
|
||||
void arctic_compress(char *compressed, const char *expanded);
|
||||
extern const char ARCTIC_CODE_PAGE[97];
|
||||
|
||||
/* normalization */
|
||||
struct ArcticIdentifier {
|
||||
const char *start; /* a pointer into the input, or null */
|
||||
int length;
|
||||
/* scanning */
|
||||
enum ArcticImmediateKind {
|
||||
ARCTIC_NONE, ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER
|
||||
};
|
||||
|
||||
enum ArcticImmediateKind { ARCTIC_NAME, ARCTIC_INTEGER, ARCTIC_NUMBER };
|
||||
struct ArcticScanner {
|
||||
void *data; /* callback data pointer */
|
||||
|
||||
struct ArcticImmediate {
|
||||
enum ArcticImmediateKind kind;
|
||||
void (*section_callback)(
|
||||
const char *name, /* the name of the section */
|
||||
void *data /* callback data */
|
||||
);
|
||||
|
||||
union {
|
||||
struct ArcticIdentifier name;
|
||||
int64_t integer;
|
||||
double number;
|
||||
};
|
||||
void (*label_callback)(
|
||||
const char *id, /* the identifier itself */
|
||||
void *data /* callback data */
|
||||
);
|
||||
|
||||
void (*op_callback)(
|
||||
char opcode, /* the character code of the operation */
|
||||
const char *im, /* the immediate value, if it exists */
|
||||
void *data /* callback data */
|
||||
);
|
||||
|
||||
void (*data_callback)(
|
||||
const char *init, /* initialization code */
|
||||
void *data /* callback data */
|
||||
);
|
||||
|
||||
char buf[ARCTIC_BUFSIZE];
|
||||
};
|
||||
|
||||
struct ArcticOperation {
|
||||
struct ArcticIdentifier label;
|
||||
struct ArcticImmediate immediate;
|
||||
int opcode; /* won't be beat or nop or comm */
|
||||
enum ArcticErrorCode {
|
||||
ARCTIC_OK = 0,
|
||||
ARCTIC_UNEXPECTED_CHAR, /* not necessarily an error */
|
||||
ARCTIC_INVALID_STATE,
|
||||
ARCTIC_BUFFER_FULL
|
||||
};
|
||||
|
||||
void arctic_normalize(struct ArcticOperation *ops, const char *expanded);
|
||||
/* returns 0 on success, or an error code */
|
||||
enum ArcticErrorCode arctic_scan(struct ArcticScanner *scanner, char next);
|
||||
|
||||
#endif /* ARCTIC_H */
|
||||
|
|
Loading…
Reference in a new issue