Started adding DMs

More progress
Commit before trip
2024-09-16 12:53:44 -04:00 · 2024-06-08 00:43:19 -04:00 · 2024-04-25 00:59:46 -04:00 · 2024-04-12 01:05:15 -04:00 · 2024-03-27 01:26:58 -04:00
4 changed files with 727 additions and 0 deletions
--- a/dictionary.kts
+++ b/dictionary.kts
@ -0,0 +1,80 @@
 #!/usr/bin/kscript
@file:Import("dsl.kt")
 glyphs { // in font encoding and alphabetical order
    // bare vowels
    glyph(vowel, "i", outer)
    glyph(vowel, "e", outer, slashed)
    glyph(vowel, "a", slashed)
    glyph(vowel, "o", `inner`, slashed)
    glyph(vowel, "u", `inner`)
    glyph(vowel, "y", both)
    // consonant clusters (note: both core can be written with a dot in most cases)
    glyph(cluster, "θ", left, top)
    glyph(cluster, "∫", left, middle)
    glyph(cluster, "x", left, bottom)
    glyph(cluster, "n", center, middle)
    glyph(cluster, "p", right, top)
    glyph(cluster, "t", right, middle)
    glyph(cluster, "k", right, bottom)
    glyph(cluster, "θl", left, top, tall)
    glyph(cluster, "∫l", left, middle, tall)
    glyph(cluster, "xl", left, bottom, tall)
    glyph(cluster, "nl", center, middle, tall)
    glyph(cluster, "pl", right, top, tall)
    glyph(cluster, "tl", right, middle, tall)
    glyph(cluster, "kl", right, bottom, tall)
    glyph(cluster, "θr", left, top, wide)
    glyph(cluster, "∫r", left, middle, wide)
    glyph(cluster, "xr", left, bottom, wide)
    glyph(cluster, "nr", center, middle, wide)
    glyph(cluster, "pr", right, top, wide)
    glyph(cluster, "tr", right, middle, wide)
    glyph(cluster, "kr", right, bottom, wide)
    glyph(cluster, "sθ", left, top, both)
    glyph(cluster, "s∫", left, middle, both)
    glyph(cluster, "sx", left, bottom, both)
    glyph(cluster, "sn", center, middle, both)
    glyph(cluster, "sp", right, top, both)
    glyph(cluster, "st", right, middle, both)
    glyph(cluster, "sk", right, bottom, both)
    syllables()
    glyph(punctuation, "«", left)
    glyph(punctuation, ".")
    glyph(punctuation, "»", right)
    glyph(numeric, "0", circle)
    glyph(numeric, "1", dash)
    glyph(numeric, "2", vee)
    glyph(numeric, "3", hump)
    glyph(numeric, "4", dash, hump)
    glyph(numeric, "5", vee, hump)
    glyph(numeric, ".", dump)
 }
 dialect("jukashenikan") {
    replace("x", "ç")
    replace("p", "j")
    // ...
 }
 dialect("gazhenigan") {
    replace("k", "g")
    replace("∫", "ʒ")
    replace("s", "z")
    replace("θ", "ð")
    replace("t", "d")
    // ...
 }
 romanization {
    // TODO
 }
 dictionary {
    // TODO
 }
--- a/dictionary.nim
+++ b/dictionary.nim
@ -0,0 +1,98 @@
 import dsl
 import std/tables
 # Tips: in python run
 #
 # import gensim.downloader
 # model = gensim.downloader.load("glove-wiki-gigaword-50") # or other models @https://github.com/piskvorky/gensim-data
 #
 # Then find words related to what you want in model[] and do math on them.
 # Then search through model to find nearby vectors using model.most_similar.
 # e.g. model.most_similar(model['aunt'] - model['woman'] + model['man'])
 dictionary:
    glyphs:
        vowel "i", outer
        vowel "e", outer, slashed
        vowel "a", slashed
        vowel "o", inner, slashed
        vowel "u", inner
        vowel "y", both
        cluster "θ", left, top
        cluster "∫", left, middle
        cluster "x", left, bottom
        cluster "n", center, middle
        cluster "p", right, top
        cluster "t", right, middle
        cluster "k", right, bottom
        cluster "θl", left, top, tall
        cluster "∫l", left, middle, tall
        cluster "xl", left, bottom, tall
        cluster "nl", center, middle, tall
        cluster "pl", right, top, tall
        cluster "tl", right, middle, tall
        cluster "kl", right, bottom, tall
        cluster "θr", left, top, wide
        cluster "∫r", left, middle, wide
        cluster "xr", left, bottom, wide
        cluster "nr", center, middle, wide
        cluster "pr", right, top, wide
        cluster "tr", right, middle, wide
        cluster "kr", right, bottom, wide
        cluster "sθ", left, top, both
        cluster "s∫", left, middle, both
        cluster "sx", left, bottom, both
        cluster "sn", center, middle, both
        cluster "sp", right, top, both
        cluster "st", right, middle, both
        cluster "sk", right, bottom, both
        syllables()
        punctuation "«", left
        punctuation "."
        punctuation "»", right
        numeric "0", circle
        numeric "1", dash
        numeric "2", vee
        numeric "3", hump
        numeric "4", dash, hump
        numeric "5", vee, hump
        numeric ".", dot
    dialect "jukashenikan":
        replace "x", "ç"
        replace "p", "j"
        # ...
    dialect "gazhenigan":
        replace "k", "g"
        replace "∫", "ʒ"
        replace "s", "z"
        replace "θ", "ð"
        replace "t", "d"
        # ...
    words:
        word "t":
            noun "thing", "See t - dmPenta for better meaning."
            # verb "be"
        penta "n x", "Pronouns":
            extremes(i="Fully proximal", u="Fully distal")
            i: pronoun "I/me"
            e: pronoun "this"
            a: pronoun "you"
            o: pronoun "it"
            u: pronoun "that"
        icosapenta "n t x", "Tests":
            ii: noun "foo"
 # TODO: Consider hepta scales as well? how?
 when isMainModule:
    echo dict.jsonify
--- a/dsl.kt
+++ b/dsl.kt
@ -0,0 +1,20 @@
 class Database {
 }
 enum class GlyphType(val repr: String) {
    Vowel("vowel"),
    Cluster("cluster"),
    Syllable("syllable"),
    Punctuation("punctuation"),
    Numeric("numeric"),
 }
 open class Feature
 data class Glyph(
    val type: GlyphType,
    val unicode: String,
    val features: Array<Feature>,
 ) {
    constructor(t: GlyphType, u: String, vararg f: Feature) : this(t, u, f) {}
 }
--- a/dsl.nim
+++ b/dsl.nim
@ -0,0 +1,529 @@
 import std/macros
 import std/tables
 import std/options
 import std/strutils
 import std/[json, jsonutils]
 import std/sequtils
 type
    GlyphAttribute* = enum
        # Vowel Attributes
        outer, slashed, inner,
        # Cluster/Punctuation Attributes
        left, center, right,
        top, middle, bottom,
        tall, wide, both,
        # Numeric Attributes
        circle, dash, vee, hump, dot,
    GlyphKind* = enum Vowel, Cluster, Punctuation, Syllable, Numeric
    Glyph* = object
        spelling*: string
        kind*: GlyphKind
        attrs*: set[GlyphAttribute]
    Replacement* = object
        original*: string
        replaced*: string
    PartOfSpeech* = enum Noun, Pronoun, Verb, Adjective, Adverb, Syntactic
    V* = enum
        V_I, V_E, V_A, V_O, V_U
    Definition* = object
        brief*: string
        long*: Option[string]
    Word* = ref object
        spelling*: string
        definitions*: Table[PartOfSpeech, Definition]
        derivations*: seq[Derivation]
    Extremes* = object
        i*: Option[string]
        u*: Option[string]
    Penta* = ref object
        spelling*: string
        name*: string
        exts*: Extremes
        elems*: array[V, Word]
    BiExtremes* = object
        first*: Extremes
        second*: Extremes
    Icosapenta* = ref object
        spelling*: string
        name*: string
        exts*: BiExtremes
        first*: array[V, Penta]
        second*: array[V, Penta]
    DerivationalMorphology* = ref object
        spelling*: string
        definitions*: Table[PartOfSpeech, Definition]
    Derivation* = ref object
        root*: Word
        dm*: DerivationalMorphology
    Dictionary* = object
        glyphs*: seq[Glyph]
        dialects*: Table[string, seq[Replacement]]
        words*: Table[string, Word]
        pentas*: Table[string, Penta]
        icosapentas*: Table[string, Icosapenta]
        dms*: Table[string, DerivationalMorphology]
 const
    Vowel2Char*: array[V, char] = [
        V_I: 'i',
        V_E: 'e',
        V_A: 'a',
        V_O: 'o',
        V_U: 'u'
    ]
 proc replaceFirst(haystack: string, needle: char, content: char): string =
    let idx = haystack.find(needle)
    result = haystack
    result[idx] = content
 proc replaceLast(haystack: string, needle: char, content: char): string =
    let idx = haystack.rfind(needle)
    result = haystack
    result[idx] = content
 macro makeposprocs(): untyped =
    result = nnkStmtList.newNimNode
    for pos in PartOfSpeech:
        let w = ident("w")
        let name = ident(($pos).toLowerAscii)
        let vpos = pos.newLit
        result.add quote do:
            proc `name`(short: string) {.used.} =
                `w`.definitions[`vpos`] = Definition(brief: short, long: string.none)
            proc `name`(short: string, long: string) {.used.} =
                `w`.definitions[`vpos`] = Definition(brief: short, long: long.some)
 macro makepentaprocs(): untyped =
    result = nnkStmtList.newNimNode
    for v in V:
        let p = ident("p")
        let name = ident("" & ($v).toLowerAscii[^1])
        let val = v.newLit
        result.add quote do:
            template `name`(defns2: untyped) {.used.} =
                edit_word `p`.elems[`val`]: defns2
 macro make_icosaprocs(): untyped =
    result = nnkStmtList.newNimNode
    let ip = ident("ip")
    # -x and x- templates
    for v in V:
        let namex = ident(($v).toLowerAscii[^1] & "x")
        let xname = ident("x" & ($v).toLowerAscii[^1])
        let val = v.newLit
        result.add quote do:
            template `namex`(defns2: untyped) {.used.} =
                edit_penta `ip`.first[`val`]: defns2
            template `xname`(defns2: untyped) {.used.} =
                edit_penta `ip`.second[`val`]: defns2
    # -- templates
    for v1 in V:
        for v2 in V:
            let name = ident(($v1).toLowerAscii[^1] & ($v2).toLowerAscii[^1])
            let val1 = v1.newLit
            let name2 = ident("" & ($v2).toLowerAscii[^1])
            result.add quote do:
                template `name`(defns2: untyped) {.used.} =
                    edit_penta(`ip`.first[`val1`]):
                        `name2`: defns2
 template glyph_dsl(dict: var Dictionary, gbody: untyped) =
    block:
        proc vowel(spelling: string, attrs: varargs[GlyphAttribute]) =
            var v: Glyph = Glyph(spelling: spelling, kind: Vowel)
            for attr in attrs:
                v.attrs.incl attr
            dict.glyphs.add v
        proc cluster(spelling: string, attrs: varargs[GlyphAttribute]) =
            var c: Glyph = Glyph(spelling: spelling, kind: Cluster)
            for attr in attrs:
                c.attrs.incl attr
            dict.glyphs.add c
        proc punctuation(spelling: string, attrs: varargs[GlyphAttribute]) =
            var p: Glyph = Glyph(spelling: spelling, kind: Punctuation)
            for attr in attrs:
                p.attrs.incl attr
            dict.glyphs.add p
        proc numeric(spelling: string, attrs: varargs[GlyphAttribute]) =
            var n: Glyph = Glyph(spelling: spelling, kind: Numeric)
            for attr in attrs:
                n.attrs.incl attr
            dict.glyphs.add n
        proc syllables() =
            var newglyphs: seq[Glyph]
            for v in dict.glyphs:
                if v.kind != Vowel:
                    continue
                for c in dict.glyphs:
                    if c.kind != Cluster:
                        continue
                    newglyphs.add Glyph(
                        spelling: c.spelling & v.spelling,
                        kind: Syllable,
                        attrs: v.attrs + c.attrs
                    )
            dict.glyphs.add newglyphs
        gbody
 template dialect_dsl(dict: var Dictionary, name: string, dbody: untyped) =
    block:
        var dial: seq[Replacement]
        proc replace(orig: string, by: string) =
            dial.add Replacement(original: orig, replaced: by)
        dbody
        dict.dialects[name] = dial
 template edit_word_dsl(theword: Word, defns: untyped) =
    block:
        var w {.inject.}: Word = theword
        makeposprocs()
        defns
 template edit_dm_dsl(thedm: DerivationalMorphology, defns: untyped) =
    block:
        var w {.inject.}: DerivationalMorphology = thedm
        makeposprocs()
        defns
 template words_dsl(dict: var Dictionary, wbody: untyped) =
    block:
        template edit_word(theword: Word, defns: untyped) =
            edit_word_dsl(theword):
                defns
        template word(ortho: string, defns: untyped) =
            block:
                var w {.inject.} = Word(spelling: ortho)
                proc via(root: string, dm: string) {.inject, used.} =
                    w.derivations.add Derivation(root: dict.words[root], dm: dict.dms[dm])
                edit_word(w): defns
                dict.words[w.spelling] = w
        template edit_penta(thepenta: Penta, defns: untyped) =
            block:
                var p {.inject.}: Penta = thepenta
                proc extremes(i {.inject.}: string, u {.inject.}: string) {.used.} =
                    p.exts.i = some(i)
                    p.exts.u = some(u)
                makepentaprocs()
                defns
        template penta(ortho: string, pname: string, defns: untyped) =
            block:
                var p {.inject.}: Penta = Penta(
                    spelling: ortho,
                    name: pname,
                    exts: Extremes(i: string.none, u: string.none)
                )
                for v in V:
                    let spelling = ortho.replaceFirst(' ', Vowel2Char[v])
                    if spelling in dict.words:
                        p.elems[v] = dict.words[spelling]
                    else:
                        word ortho.replaceFirst(' ', Vowel2Char[v]):
                            p.elems[v] = w
                edit_penta(p): defns
                dict.pentas[ortho] = p
        template edit_icosapenta(theicosa: Icosapenta, idefns: untyped) =
            block:
                var ip {.inject.}: Icosapenta = theicosa
                proc firsts(i {.inject.}: string, u {.inject.}: string) {.used.} =
                    ip.exts.first.i = some(i)
                    ip.exts.first.u = some(u)
                proc seconds(i {.inject.}: string, u {.inject.}: string) {.used.} =
                    ip.exts.second.i = some(i)
                    ip.exts.second.u = some(u)
                template ix(defns2: untyped) {.used.} =
                    edit_penta ip.first[V_I]: defns2
                template ex(defns2: untyped) {.used.} =
                    edit_penta ip.first[V_E]: defns2
                template ax(defns2: untyped) {.used.} =
                    edit_penta ip.first[V_A]: defns2
                template ox(defns2: untyped) {.used.} =
                    edit_penta ip.first[V_O]: defns2
                template ux(defns2: untyped) {.used.} =
                    edit_penta ip.first[V_U]: defns2
                template xi(defns2: untyped) {.used.} =
                    edit_penta ip.second[V_I]: defns2
                template xe(defns2: untyped) {.used.} =
                    edit_penta ip.second[V_E]: defns2
                template xa(defns2: untyped) {.used.} =
                    edit_penta ip.second[V_A]: defns2
                template xo(defns2: untyped) {.used.} =
                    edit_penta ip.second[V_O]: defns2
                template xu(defns2: untyped) {.used.} =
                    edit_penta ip.second[V_U]: defns2
                make_icosaprocs()
                # NOTE: first extremes is exts, second extremes is elems[*].exts
                idefns
        template icosapenta(iortho: string, iname: string, idefns: untyped) =
            block:
                var ip {.inject.}: Icosapenta = Icosapenta(
                    spelling: iortho,
                    name: iname,
                    exts: BiExtremes(
                        first: Extremes(i: string.none, u: string.none),
                        second: Extremes(i: string.none, u: string.none)
                    )
                )
                for v in V:
                    penta iortho.replaceFirst(' ', Vowel2Char[v]), Vowel2Char[v] & "x " & iname:
                        ip.first[v] = p
                    penta iortho.replaceLast(' ', Vowel2Char[v]), "x" & Vowel2Char[v] & " " & iname:
                        ip.second[v] = p
                edit_icosapenta(ip): idefns
                dict.icosapentas[iortho] = ip
        template edit_dm(thedm: DerivationalMorphology, dmbody: untyped) =
            edit_dm_dsl(thedm):
                dmbody
        template dm(ortho: string, defns: untyped) =
            block:
                var w {.inject.} = DerivationalMorphology(spelling: ortho)
                edit_dm(w): defns
                dict.dms[ortho] = w
        wbody
 template dictionary*(body: untyped) =
    var dict {.inject.}: Dictionary
    dict.glyphs = @[]
    template glyphs(gbody: untyped) =
        glyph_dsl(dict):
            gbody
    template dialect(name: string, dbody: untyped) =
        dialect_dsl(dict, name):
            dbody
    # TODO: add a words: proc to create and return a dm, so that within words
    # you can use it
    # e.g.
    #
    # words:
    #   let foo = dm "example-":
    #       noun "does something to nouns"
    #
    #   word "example1":
    #       noun "does something once to nouns"
    #       by "1" via foo
    #       # or maybe?
    #       foo["1"]
    template words(wbody: untyped) =
        words_dsl(dict):
            wbody
    body
 proc toJsonHook(attrs: set[GlyphAttribute]): JsonNode =
    newJString(attrs.toSeq.join("-"))
 proc toJsonHook(glyphs: seq[Glyph]): JsonNode =
    result = newJObject()
    for glyph in glyphs:
        let kindname = ($glyph.kind).toLowerAscii
        if kindname notin result:
            result[kindname] = newJObject()
        result[kindname][glyph.spelling] = glyph.attrs.toJson
 proc toJsonHook(defns: Table[PartOfSpeech, Definition]): JsonNode =
    result = newJObject()
    for pos, defn in defns:
        result[($pos).toLowerAscii] = defn.toJson
 proc toJsonHook(words: seq[Word]): JsonNode =
    result = newJObject()
    for word in words:
        result[word.spelling] = word.definitions.toJson
 proc toJsonHook(exts: Extremes): JsonNode =
    result = newJObject()
    if exts.i.isSome:
        result["i"] = newJString(exts.i.get)
    if exts.u.isSome:
        result["u"] = newJString(exts.u.get)
 proc toJson(penta: Penta): JsonNode =
    result = newJObject()
    result["extremes"] = penta.exts.toJson
    result["name"] = newJString(penta.name)
 proc toJson(pentas: seq[Penta]): JsonNode =
    result = newJObject()
    for penta in pentas:
        result[penta.spelling] = penta.toJson
 proc toJson(exts: BiExtremes): JsonNode =
    result = newJObject()
    if exts.first.i.isSome:
        result["ix"] = newJString(exts.first.i.get)
    if exts.first.u.isSome:
        result["ux"] = newJString(exts.first.u.get)
    if exts.second.i.isSome:
        result["xi"] = newJString(exts.second.i.get)
    if exts.second.u.isSome:
        result["xu"] = newJString(exts.second.u.get)
 proc toJson(icosa: Icosapenta): JsonNode =
    result = newJObject()
    result["extremes"] = icosa.exts.toJson
    result["name"] = newJString(icosa.name)
 proc toJson(icosas: seq[Icosapenta]): JsonNode =
    result = newJObject()
    for icosa in icosas:
        result[icosa.spelling] = icosa.toJson
 proc toJsonHook(dict: Dictionary): JsonNode =
    result = newJObject()
    result["glyphs"] = dict.glyphs.toJson
    result["dialects"] = dict.dialects.toJson
    result["words"] = dict.words.values.toSeq.toJson
    result["pentas"] = dict.pentas.toJson
    result["icosas"] = dict.icosapentas.toJson
 proc jsonify*(dict: Dictionary): string =
    return $(dict.toJson)
 proc `$`*(word: Word): string =
    if word.definitions.len > 0:
        return word.spelling & "(" & word.definitions.pairs.toSeq.map(proc(p: (PartOfSpeech, Definition)): string = $p[0] & ":" & p[1].brief).join(",") & ")"
    return word.spelling
 when isMainModule:
    dictionary:
        glyphs:
            vowel "i", outer
            vowel "y", both
            cluster "θ", left, top
            cluster "∫", left, middle
            syllables()
            punctuation "«", left
            numeric "0", circle
        dialect "jukashenikan":
            replace "x", "ç"
        dialect "gazhenigan":
            replace "k", "g"
        words:
            dm "foo-":
                noun "fooish"
            word "t":
                noun "thing", "See t - dmPenta for better meaning."
                # verb "be"
            penta "n x", "Pronouns":
                extremes(i="Fully proximal", u="Fully distal")
                i: pronoun "I/me"
            icosapenta "n t t", "Tests":
                ii: verb "foo"
            word "foot":
                noun "fooish thing"
                via("t", "foo-")
                # "t".via "foo-"
    echo dict.jsonify
Author	SHA1	Message	Date
Louis Burke	c95ff6e188	Started adding DMs	2024-09-16 12:53:44 -04:00
Louis Burke	12b9a7a1be	More progress	2024-06-08 00:43:19 -04:00
Louis Burke	63d7e13e11	Commit before trip	2024-04-25 00:59:46 -04:00
Louis Burke	2a4a29c051	Good progress in nim	2024-04-12 01:05:15 -04:00
Louis Burke	3219555785	Trying out kotlin and nim	2024-03-27 01:26:58 -04:00