Commit before trip

2024-04-25 00:59:46 -04:00 · 2024-04-25 00:59:46 -04:00 · 63d7e13e11
parent 2a4a29c051
commit 63d7e13e11
2 changed files with 115 additions and 26 deletions
--- a/dictionary.nim
+++ b/dictionary.nim
@ -3,10 +3,11 @@ import dsl
 # Tips: in python run
 #
 # import gensim.downloader
-# model = gensim.downloader.load("glove-wiki-gigaword-50")
+# model = gensim.downloader.load("glove-wiki-gigaword-50") # or other models @https://github.com/piskvorky/gensim-data
 #
 # Then find words related to what you want in model[] and do math on them.
-# Then search through model to find nearby vectors.
+# Then search through model to find nearby vectors using model.most_similar.
+# e.g. model.most_similar(model['aunt'] - model['woman'] + model['man'])

 dictionary:
    glyphs:
@ -87,6 +88,7 @@ dictionary:
            o: pronoun "it"
            u: pronoun "that"

+# TODO: Consider hepta scales as well? how?

 when isMainModule:
-    echo dict.toJSON
+    echo dict.jsonify
--- a/dsl.nim
+++ b/dsl.nim
@ -2,6 +2,8 @@ import std/macros
 import std/tables
 import std/options
 import std/strutils
+import std/[json, jsonutils]
+import std/sequtils

 type
    GlyphAttribute* = enum
@ -29,11 +31,14 @@ type

    PartOfSpeech* = enum Noun, Pronoun, Verb, Adjective, Adverb, Syntactic

+    V* = enum
+        V_I, V_E, V_A, V_O, V_U
+
    Definition* = object
        brief*: string
        long*: Option[string]

-    Word* = object
+    Word* = ref object
        spelling*: string
        definitions*: Table[PartOfSpeech, Definition]

@ -41,21 +46,27 @@ type
        i*: Option[string]
        u*: Option[string]

-    Penta*[T] = object
+    Penta*[T] = ref object
        spelling*: string
        name*: string
        exts*: Extremes
-        i*: ptr T
-        e*: ptr T
-        a*: ptr T
-        o*: ptr T
-        u*: ptr T
+        elems*: array[V, T]

    Dictionary* = object
        glyphs*: seq[Glyph]
        dialects*: Table[string, seq[Replacement]]
        words*: seq[Word]
        pentas*: seq[Penta[Word]]
+        icosipentas*: seq[Penta[Penta[Word]]]
+
+const
+    Vowel2Char*: array[V, char] = [
+        V_I: 'i',
+        V_E: 'e',
+        V_A: 'a',
+        V_O: 'o',
+        V_U: 'u'
+    ]

 proc replaceFirst(haystack: string, needle: char, content: char): string =
    let idx = haystack.find(needle)
@ -142,7 +153,7 @@ template dictionary*(body: untyped) =
        block:
            template word(ortho: string, defns: untyped) =
                block:
-                    var w {.inject.}: Word = Word(spelling: ortho)
+                    var w {.inject.} = Word(spelling: ortho)

                    makeposprocs()

@ -150,41 +161,117 @@ template dictionary*(body: untyped) =

                    dict.words.add w

+            template edit_word(theword: Word, defns: untyped) =
+                block:
+                    var w {.inject.}: Word = theword
+
+                    makeposprocs()
+
+                    defns
+
            template penta(ortho: string, pname: string, defns: untyped) =
                block:
-                    var p {.inject.}: Penta[Word]
-                    p.spelling = ortho
+                    var p {.inject.}: Penta[Word] = Penta[Word](
+                        spelling: ortho,
+                        name: pname,
+                        exts: Extremes(i: string.none, u: string.none)
+                    )
+                    for v in V:
+                        word ortho.replaceFirst(' ', Vowel2Char[v]):
+                            p.elems[v] = w

                    proc extremes(i {.inject.}: string, u {.inject.}: string) {.used.} =
                        p.exts.i = some(i)
                        p.exts.u = some(u)

                    template i(defns2: untyped) {.used.} =
-                        word ortho.replaceFirst(' ', 'i'): defns2; p.i = w.addr
+                        edit_word p.elems[V_I]: defns2
                    template e(defns2: untyped) {.used.} =
-                        word ortho.replaceFirst(' ', 'e'): defns2; p.e = w.addr
+                        edit_word p.elems[V_E]: defns2
                    template a(defns2: untyped) {.used.} =
-                        word ortho.replaceFirst(' ', 'a'): defns2; p.a = w.addr
+                        edit_word p.elems[V_A]: defns2
                    template o(defns2: untyped) {.used.} =
-                        word ortho.replaceFirst(' ', 'o'): defns2; p.o = w.addr
+                        edit_word p.elems[V_O]: defns2
                    template u(defns2: untyped) {.used.} =
-                        word ortho.replaceFirst(' ', 'u'): defns2; p.u = w.addr
-
-                    p.spelling = ortho
-                    p.name = pname
+                        edit_word p.elems[V_U]: defns2

                    defns

                    dict.pentas.add p

+            template icosipenta(iortho: string, iname: string, idefns: untyped) =
+                block:
+                    var i {.inject.}: Penta[Penta[Word]] = Penta[Penta[Word]](
+                        spelling: iortho,
+                        name: iname,
+                        exts: Extremes(i: string.none, u: string.none)
+                    )
+                    for v in V:
+                        penta iortho.replaceFirst(' ', Vowel2Char[v]), Vowel2Char[v] & "x " & iname:
+                            i.elems[v] = p
+
+
+                    # NOTE: first extremes is exts, second extremes is elems[*].exts
+
+
            wbody

-    # TODO: penta[X]s
    body

-proc toJSON*(dict: Dictionary): string =
-    return $dict
-    # TODO: stringify to JSON
+proc toJsonHook(attrs: set[GlyphAttribute]): JsonNode =
+    newJString(attrs.toSeq.join("-"))
+
+proc toJsonHook(glyphs: seq[Glyph]): JsonNode =
+    result = newJObject()
+    for glyph in glyphs:
+        let kindname = ($glyph.kind).toLowerAscii
+        if kindname notin result:
+            result[kindname] = newJObject()
+        result[kindname][glyph.spelling] = glyph.attrs.toJson
+
+proc toJsonHook(defns: Table[PartOfSpeech, Definition]): JsonNode =
+    result = newJObject()
+    for pos, defn in defns:
+        result[($pos).toLowerAscii] = defn.toJson
+
+proc toJsonHook(words: seq[Word]): JsonNode =
+    result = newJObject()
+    for word in words:
+        result[word.spelling] = word.definitions.toJson
+
+proc toJsonHook(exts: Extremes): JsonNode =
+    result = newJObject()
+    if exts.i.isSome:
+        result["i"] = newJString(exts.i.get)
+    if exts.u.isSome:
+        result["u"] = newJString(exts.u.get)
+
+
+proc toJson(penta: Penta[Word], dict: Dictionary): JsonNode =
+    result = newJObject()
+
+    result["extremes"] = penta.exts.toJson
+    result["name"] = newJString(penta.name)
+    # TODO: Consider looking up i/e/a/o/u in dict
+
+
+proc toJson(pentas: seq[Penta[Word]], dict: Dictionary): JsonNode =
+    result = newJObject()
+
+    for penta in pentas:
+        result[penta.spelling] = penta.toJson(dict)
+
+
+proc toJsonHook(dict: Dictionary): JsonNode =
+    result = newJObject()
+
+    result["glyphs"] = dict.glyphs.toJson
+    result["dialects"] = dict.dialects.toJson
+    result["words"] = dict.words.toJson
+    result["pentas"] = dict.pentas.toJson(dict)
+
+proc jsonify*(dict: Dictionary): string =
+    return $(dict.toJson)

 when isMainModule:
    dictionary:
@ -217,4 +304,4 @@ when isMainModule:

                i: pronoun "I/me"

-    echo dict.toJSON
+    echo dict.jsonify