From 63d7e13e1128830ca5964f38f0578594569048d5 Mon Sep 17 00:00:00 2001 From: Louis Burke Date: Thu, 25 Apr 2024 00:59:46 -0400 Subject: [PATCH] Commit before trip --- dictionary.nim | 8 +-- dsl.nim | 133 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 115 insertions(+), 26 deletions(-) diff --git a/dictionary.nim b/dictionary.nim index a4a5dfc..3566bc8 100644 --- a/dictionary.nim +++ b/dictionary.nim @@ -3,10 +3,11 @@ import dsl # Tips: in python run # # import gensim.downloader -# model = gensim.downloader.load("glove-wiki-gigaword-50") +# model = gensim.downloader.load("glove-wiki-gigaword-50") # or other models @https://github.com/piskvorky/gensim-data # # Then find words related to what you want in model[] and do math on them. -# Then search through model to find nearby vectors. +# Then search through model to find nearby vectors using model.most_similar. +# e.g. model.most_similar(model['aunt'] - model['woman'] + model['man']) dictionary: glyphs: @@ -87,6 +88,7 @@ dictionary: o: pronoun "it" u: pronoun "that" +# TODO: Consider hepta scales as well? how? when isMainModule: - echo dict.toJSON + echo dict.jsonify diff --git a/dsl.nim b/dsl.nim index 2525397..53d745a 100644 --- a/dsl.nim +++ b/dsl.nim @@ -2,6 +2,8 @@ import std/macros import std/tables import std/options import std/strutils +import std/[json, jsonutils] +import std/sequtils type GlyphAttribute* = enum @@ -29,11 +31,14 @@ type PartOfSpeech* = enum Noun, Pronoun, Verb, Adjective, Adverb, Syntactic + V* = enum + V_I, V_E, V_A, V_O, V_U + Definition* = object brief*: string long*: Option[string] - Word* = object + Word* = ref object spelling*: string definitions*: Table[PartOfSpeech, Definition] @@ -41,21 +46,27 @@ type i*: Option[string] u*: Option[string] - Penta*[T] = object + Penta*[T] = ref object spelling*: string name*: string exts*: Extremes - i*: ptr T - e*: ptr T - a*: ptr T - o*: ptr T - u*: ptr T + elems*: array[V, T] Dictionary* = object glyphs*: seq[Glyph] dialects*: Table[string, seq[Replacement]] words*: seq[Word] pentas*: seq[Penta[Word]] + icosipentas*: seq[Penta[Penta[Word]]] + +const + Vowel2Char*: array[V, char] = [ + V_I: 'i', + V_E: 'e', + V_A: 'a', + V_O: 'o', + V_U: 'u' + ] proc replaceFirst(haystack: string, needle: char, content: char): string = let idx = haystack.find(needle) @@ -142,7 +153,7 @@ template dictionary*(body: untyped) = block: template word(ortho: string, defns: untyped) = block: - var w {.inject.}: Word = Word(spelling: ortho) + var w {.inject.} = Word(spelling: ortho) makeposprocs() @@ -150,41 +161,117 @@ template dictionary*(body: untyped) = dict.words.add w + template edit_word(theword: Word, defns: untyped) = + block: + var w {.inject.}: Word = theword + + makeposprocs() + + defns + template penta(ortho: string, pname: string, defns: untyped) = block: - var p {.inject.}: Penta[Word] - p.spelling = ortho + var p {.inject.}: Penta[Word] = Penta[Word]( + spelling: ortho, + name: pname, + exts: Extremes(i: string.none, u: string.none) + ) + for v in V: + word ortho.replaceFirst(' ', Vowel2Char[v]): + p.elems[v] = w proc extremes(i {.inject.}: string, u {.inject.}: string) {.used.} = p.exts.i = some(i) p.exts.u = some(u) template i(defns2: untyped) {.used.} = - word ortho.replaceFirst(' ', 'i'): defns2; p.i = w.addr + edit_word p.elems[V_I]: defns2 template e(defns2: untyped) {.used.} = - word ortho.replaceFirst(' ', 'e'): defns2; p.e = w.addr + edit_word p.elems[V_E]: defns2 template a(defns2: untyped) {.used.} = - word ortho.replaceFirst(' ', 'a'): defns2; p.a = w.addr + edit_word p.elems[V_A]: defns2 template o(defns2: untyped) {.used.} = - word ortho.replaceFirst(' ', 'o'): defns2; p.o = w.addr + edit_word p.elems[V_O]: defns2 template u(defns2: untyped) {.used.} = - word ortho.replaceFirst(' ', 'u'): defns2; p.u = w.addr - - p.spelling = ortho - p.name = pname + edit_word p.elems[V_U]: defns2 defns dict.pentas.add p + template icosipenta(iortho: string, iname: string, idefns: untyped) = + block: + var i {.inject.}: Penta[Penta[Word]] = Penta[Penta[Word]]( + spelling: iortho, + name: iname, + exts: Extremes(i: string.none, u: string.none) + ) + for v in V: + penta iortho.replaceFirst(' ', Vowel2Char[v]), Vowel2Char[v] & "x " & iname: + i.elems[v] = p + + + # NOTE: first extremes is exts, second extremes is elems[*].exts + + wbody - # TODO: penta[X]s body -proc toJSON*(dict: Dictionary): string = - return $dict - # TODO: stringify to JSON +proc toJsonHook(attrs: set[GlyphAttribute]): JsonNode = + newJString(attrs.toSeq.join("-")) + +proc toJsonHook(glyphs: seq[Glyph]): JsonNode = + result = newJObject() + for glyph in glyphs: + let kindname = ($glyph.kind).toLowerAscii + if kindname notin result: + result[kindname] = newJObject() + result[kindname][glyph.spelling] = glyph.attrs.toJson + +proc toJsonHook(defns: Table[PartOfSpeech, Definition]): JsonNode = + result = newJObject() + for pos, defn in defns: + result[($pos).toLowerAscii] = defn.toJson + +proc toJsonHook(words: seq[Word]): JsonNode = + result = newJObject() + for word in words: + result[word.spelling] = word.definitions.toJson + +proc toJsonHook(exts: Extremes): JsonNode = + result = newJObject() + if exts.i.isSome: + result["i"] = newJString(exts.i.get) + if exts.u.isSome: + result["u"] = newJString(exts.u.get) + + +proc toJson(penta: Penta[Word], dict: Dictionary): JsonNode = + result = newJObject() + + result["extremes"] = penta.exts.toJson + result["name"] = newJString(penta.name) + # TODO: Consider looking up i/e/a/o/u in dict + + +proc toJson(pentas: seq[Penta[Word]], dict: Dictionary): JsonNode = + result = newJObject() + + for penta in pentas: + result[penta.spelling] = penta.toJson(dict) + + +proc toJsonHook(dict: Dictionary): JsonNode = + result = newJObject() + + result["glyphs"] = dict.glyphs.toJson + result["dialects"] = dict.dialects.toJson + result["words"] = dict.words.toJson + result["pentas"] = dict.pentas.toJson(dict) + +proc jsonify*(dict: Dictionary): string = + return $(dict.toJson) when isMainModule: dictionary: @@ -217,4 +304,4 @@ when isMainModule: i: pronoun "I/me" - echo dict.toJSON + echo dict.jsonify